fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals

* add smart turn as provider

* chore: remove deprecations

* chore: format files

* fix: remove deprecated UserIdleProcessor

* fix: remove deprecated TranscriptProcessor

* chore: update pipecat submodule

* feat: add evals visualisation

* fix: trigger llm generation on client connected and pipeline started

* chore: update pipecat

* chore: update pipecat submodule

* Add tests

* fix: slow loading of workflow page

* chore: update pipecat submodule

* Show version after release

* Fixes #99

* fix: provider check for websocket connection

* Fixes #107

* Fix #96

* chore: fix documentation

* fix: cloudonix campaign call error

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
Abhishek 2026-01-23 18:53:59 +05:30 committed by GitHub
parent a4367bd83b
commit 911c5ed416
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
104 changed files with 16919 additions and 597 deletions

View file

@ -1 +1,2 @@
api/.env
api/.env
evals/

View file

@ -4,7 +4,7 @@ on:
release:
types: [published]
# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
@ -13,11 +13,11 @@ jobs:
build:
runs-on: ubuntu-latest
env:
COMMIT_SHA: ${{ github.sha }} # Used to tag images with short commit SHA
COMMIT_SHA: ${{ github.sha }}
strategy:
matrix:
service:
service:
- "dograh-api|api/Dockerfile|."
- "dograh-ui|ui/Dockerfile|."
@ -25,14 +25,12 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: true # Only for version check, not used in build
submodules: true
# Pipecat version check removed - now using local submodule
- name: Set up QEMU # Enables cross-platform builds (e.g., arm64)
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx # Enables multi-arch and advanced Docker builds
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to DockerHub
@ -51,48 +49,50 @@ jobs:
- name: Set build variables
id: build-vars
run: |
# Parse matrix entry and set variables early (before build)
SERVICE="${{ matrix.service }}"
IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
SHORT_SHA=${COMMIT_SHA::8}
# Export for use in subsequent steps
# Get version from release tag (removes 'dograh-' and 'v' prefixes if present)
VERSION="${{ github.event.release.tag_name }}"
VERSION="${VERSION#dograh-}"
VERSION="${VERSION#v}"
echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "service=${SERVICE}" >> $GITHUB_OUTPUT
echo "version=${VERSION}" >> $GITHUB_OUTPUT
- name: Build and Push ${{ matrix.service }}
id: docker-build
run: |
# Parse matrix entry into individual variables
SERVICE="${{ matrix.service }}"
IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
DOCKERFILE=$(echo "$SERVICE" | cut -d '|' -f2)
CONTEXT=$(echo "$SERVICE" | cut -d '|' -f3)
SHORT_SHA=${COMMIT_SHA::8}
VERSION="${{ steps.build-vars.outputs.version }}"
echo "Building and pushing image: $IMAGE_NAME"
echo "Dockerfile: $DOCKERFILE"
echo "Context: $CONTEXT"
echo "Commit SHA: $SHORT_SHA"
# Export tags for Slack notification
echo "Version: $VERSION"
echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
echo "dockerhub_tag=${{ secrets.DOCKERHUB_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "ghcr_tag=ghcr.io/${{ secrets.GHCR_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
# Build and push multi-arch Docker image to DockerHub and GHCR
docker buildx build \
-f "$DOCKERFILE" \
--platform linux/amd64,linux/arm64 \
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$VERSION \
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:latest \
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$VERSION \
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:latest \
--push "$CONTEXT"
# Success notification
- name: Send Slack notification - Success
if: success()
uses: slackapi/slack-github-action@v1.26.0
@ -101,10 +101,9 @@ jobs:
with:
payload: |
{
"text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }}"
"text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }}"
}
# Failure notification
- name: Send Slack notification - Failure
if: failure()
uses: slackapi/slack-github-action@v1.26.0
@ -113,5 +112,5 @@ jobs:
with:
payload: |
{
"text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
"text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
}

1
.gitignore vendored
View file

@ -1,6 +1,7 @@
__pycache__
.DS_Store
.env
.env.prod
.env.test
# logs and run directory on production

View file

@ -0,0 +1,72 @@
"""add public_access_token
Revision ID: 181475b2a1a1
Revises: dc33eef8dabe
Create Date: 2026-01-23 17:37:54.449308
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "181475b2a1a1"
down_revision: Union[str, None] = "dc33eef8dabe"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_api_keys_key_hash"), table_name="api_keys")
op.create_index("ix_api_keys_key_hash", "api_keys", ["key_hash"], unique=False)
op.create_index(
"ix_kb_chunks_embedding_ivfflat",
"knowledge_base_chunks",
["embedding"],
unique=False,
postgresql_using="ivfflat",
postgresql_with={"lists": 100},
postgresql_ops={"embedding": "vector_cosine_ops"},
)
op.create_index(
"ix_kb_chunks_embedding_model",
"knowledge_base_chunks",
["embedding_model"],
unique=False,
)
op.add_column(
"workflow_runs",
sa.Column("public_access_token", sa.String(length=36), nullable=True),
)
op.create_index(
"idx_workflow_runs_public_access_token",
"workflow_runs",
["public_access_token"],
unique=True,
postgresql_where=sa.text("public_access_token IS NOT NULL"),
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(
"idx_workflow_runs_public_access_token",
table_name="workflow_runs",
postgresql_where=sa.text("public_access_token IS NOT NULL"),
)
op.drop_column("workflow_runs", "public_access_token")
op.drop_index("ix_kb_chunks_embedding_model", table_name="knowledge_base_chunks")
op.drop_index(
"ix_kb_chunks_embedding_ivfflat",
table_name="knowledge_base_chunks",
postgresql_using="ivfflat",
postgresql_with={"lists": 100},
postgresql_ops={"embedding": "vector_cosine_ops"},
)
op.drop_index("ix_api_keys_key_hash", table_name="api_keys")
op.create_index(op.f("ix_api_keys_key_hash"), "api_keys", ["key_hash"], unique=True)
# ### end Alembic commands ###

View file

@ -14,7 +14,6 @@ FILLER_SOUND_PROBABILITY = 0.0
VOICEMAIL_RECORDING_DURATION = 5.0
# Configuration constants
ENABLE_SMART_TURN = os.getenv("ENABLE_SMART_TURN", "false").lower() == "true"
ENABLE_TRACING = os.getenv("ENABLE_TRACING", "false").lower() == "true"
ENABLE_RNNOISE = os.getenv("ENABLE_RNNOISE", "false").lower() == "true"
@ -52,6 +51,23 @@ ENABLE_ARI_STASIS = os.getenv("ENABLE_ARI_STASIS", "false").lower() == "true"
SERIALIZE_LOG_OUTPUT = os.getenv("SERIALIZE_LOG_OUTPUT", "false").lower() == "true"
ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true"
def _get_version() -> str:
"""Read version from pyproject.toml."""
try:
import tomllib
pyproject_path = APP_ROOT_DIR / "pyproject.toml"
with open(pyproject_path, "rb") as f:
pyproject = tomllib.load(f)
return pyproject.get("project", {}).get("version", "dev")
except Exception:
return "dev"
# Application version (read from pyproject.toml)
APP_VERSION = _get_version()
# Country code mapping: ISO country code -> international dialing prefix
COUNTRY_CODES = {
"US": "1", # United States

View file

@ -360,6 +360,17 @@ class WorkflowRunModel(Base):
campaign = relationship("CampaignModel")
queued_run_id = Column(Integer, ForeignKey("queued_runs.id"), nullable=True)
queued_run = relationship("QueuedRunModel", foreign_keys=[queued_run_id])
public_access_token = Column(String(36), nullable=True)
# Indexes
__table_args__ = (
Index(
"idx_workflow_runs_public_access_token",
"public_access_token",
unique=True,
postgresql_where=text("public_access_token IS NOT NULL"),
),
)
# LoopTalk Testing Models

View file

@ -4,7 +4,7 @@ from typing import Optional
from sqlalchemy import func
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import load_only, selectinload
from api.db.base_client import BaseDBClient
from api.db.models import WorkflowDefinitionModel, WorkflowModel, WorkflowRunModel
@ -111,6 +111,70 @@ class WorkflowClient(BaseDBClient):
result = await session.execute(query)
return result.scalars().all()
async def get_all_workflows_for_listing(
self, organization_id: int = None, status: str = None
) -> list[WorkflowModel]:
"""Get workflows with only the columns needed for listing.
This is an optimized version that excludes large JSON columns like
workflow_definition, template_context_variables, etc.
Args:
organization_id: Filter by organization ID
status: Filter by status (active/archived)
Returns:
List of WorkflowModel with only id, name, status, created_at loaded
"""
async with self.async_session() as session:
query = select(WorkflowModel).options(
load_only(
WorkflowModel.id,
WorkflowModel.name,
WorkflowModel.status,
WorkflowModel.created_at,
)
)
if organization_id:
query = query.where(WorkflowModel.organization_id == organization_id)
if status:
query = query.where(WorkflowModel.status == status)
result = await session.execute(query)
return result.scalars().all()
async def get_workflow_counts(self, organization_id: int = None) -> dict[str, int]:
"""Get workflow counts by status.
Args:
organization_id: Filter by organization ID
Returns:
Dict with 'total', 'active', 'archived' counts
"""
async with self.async_session() as session:
query = select(
WorkflowModel.status,
func.count(WorkflowModel.id).label("count"),
)
if organization_id:
query = query.where(WorkflowModel.organization_id == organization_id)
query = query.group_by(WorkflowModel.status)
result = await session.execute(query)
rows = result.all()
counts = {"total": 0, "active": 0, "archived": 0}
for status, count in rows:
counts[status] = count
counts["total"] += count
return counts
async def get_workflow(
self, workflow_id: int, user_id: int = None, organization_id: int = None
) -> WorkflowModel | None:
@ -310,3 +374,33 @@ class WorkflowClient(BaseDBClient):
)
)
return result.scalar() or 0
async def get_workflow_run_counts(self, workflow_ids: list[int]) -> dict[int, int]:
"""Get run counts for multiple workflows in a single query.
Args:
workflow_ids: List of workflow IDs to get counts for
Returns:
Dict mapping workflow_id to run count
"""
if not workflow_ids:
return {}
async with self.async_session() as session:
result = await session.execute(
select(
WorkflowRunModel.workflow_id,
func.count(WorkflowRunModel.id).label("run_count"),
)
.where(WorkflowRunModel.workflow_id.in_(workflow_ids))
.group_by(WorkflowRunModel.workflow_id)
)
rows = result.all()
# Build dict with counts, defaulting to 0 for workflows with no runs
counts = {workflow_id: 0 for workflow_id in workflow_ids}
for workflow_id, run_count in rows:
counts[workflow_id] = run_count
return counts

View file

@ -1,3 +1,4 @@
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
@ -414,3 +415,56 @@ class WorkflowRunClient(BaseDBClient):
organization_id = workflow_run.workflow.user.selected_organization_id
return workflow_run, organization_id
async def ensure_public_access_token(self, workflow_run_id: int) -> Optional[str]:
"""Generate a public access token if not exists, return existing if present (idempotent).
Args:
workflow_run_id: The ID of the workflow run
Returns:
The public access token string, or None if workflow run not found
"""
async with self.async_session() as session:
result = await session.execute(
select(WorkflowRunModel).where(WorkflowRunModel.id == workflow_run_id)
)
run = result.scalars().first()
if not run:
return None
# Return existing token if present
if run.public_access_token:
return run.public_access_token
# Generate and persist new token
token = str(uuid.uuid4())
run.public_access_token = token
try:
await session.commit()
except Exception as e:
await session.rollback()
raise e
await session.refresh(run)
return run.public_access_token
async def get_workflow_run_by_public_token(
self, token: str
) -> Optional[WorkflowRunModel]:
"""Lookup workflow run by public access token.
Args:
token: The public access token
Returns:
The WorkflowRunModel if found, None otherwise
"""
async with self.async_session() as session:
result = await session.execute(
select(WorkflowRunModel).where(
WorkflowRunModel.public_access_token == token
)
)
return result.scalars().first()

5
api/pyproject.toml Normal file
View file

@ -0,0 +1,5 @@
[project]
name = "dograh-api"
version = "1.10.0"
description = "Backend API for Dograh voice AI platform"
requires-python = ">=3.12"

View file

@ -1,5 +1,6 @@
from fastapi import APIRouter
from loguru import logger
from pydantic import BaseModel
from api.routes.campaign import router as campaign_router
from api.routes.credentials import router as credentials_router
@ -9,6 +10,7 @@ from api.routes.looptalk import router as looptalk_router
from api.routes.organization import router as organization_router
from api.routes.organization_usage import router as organization_usage_router
from api.routes.public_agent import router as public_agent_router
from api.routes.public_download import router as public_download_router
from api.routes.public_embed import router as public_embed_router
from api.routes.reports import router as reports_router
from api.routes.s3_signed_url import router as s3_router
@ -43,11 +45,24 @@ router.include_router(reports_router)
router.include_router(webrtc_signaling_router)
router.include_router(public_embed_router)
router.include_router(public_agent_router)
router.include_router(public_download_router)
router.include_router(workflow_embed_router)
router.include_router(knowledge_base_router)
@router.get("/health")
async def health():
class HealthResponse(BaseModel):
status: str
version: str
backend_api_endpoint: str
@router.get("/health", response_model=HealthResponse)
async def health() -> HealthResponse:
from api.constants import APP_VERSION, BACKEND_API_ENDPOINT
logger.debug("Health endpoint called")
return {"message": "OK"}
return HealthResponse(
status="ok",
version=APP_VERSION,
backend_api_endpoint=BACKEND_API_ENDPOINT,
)

View file

@ -0,0 +1,95 @@
"""Public download endpoints for workflow recordings and transcripts.
These endpoints provide secure, token-based public access to workflow artifacts
without requiring authentication. Tokens are generated on-demand when webhooks
are executed and included in the webhook payload.
"""
from typing import Literal
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import RedirectResponse
from loguru import logger
from api.db import db_client
from api.services.storage import get_storage_for_backend
router = APIRouter(prefix="/public/download")
@router.get("/workflow/{token}/{artifact_type}")
async def download_workflow_artifact(
token: str,
artifact_type: Literal["recording", "transcript"],
inline: bool = Query(
default=False, description="Display inline in browser instead of download"
),
):
"""Download a workflow recording or transcript via public access token.
This endpoint:
1. Validates the public access token
2. Looks up the corresponding workflow run
3. Generates a signed URL for the requested artifact
4. Redirects to the signed URL
Args:
token: The public access token (UUID format)
artifact_type: Type of artifact - "recording" or "transcript"
inline: If true, sets Content-Disposition to inline for browser preview
Returns:
RedirectResponse to the signed URL (302 redirect)
Raises:
HTTPException 404: If token is invalid or artifact not found
"""
# 1. Lookup workflow run by token
workflow_run = await db_client.get_workflow_run_by_public_token(token)
if not workflow_run:
logger.warning(f"Invalid public access token: {token[:8]}...")
raise HTTPException(status_code=404, detail="Invalid or expired token")
# 2. Get file path based on artifact type
if artifact_type == "recording":
file_path = workflow_run.recording_url
else: # transcript
file_path = workflow_run.transcript_url
if not file_path:
logger.warning(
f"Artifact not found: type={artifact_type}, workflow_run_id={workflow_run.id}"
)
raise HTTPException(
status_code=404,
detail=f"No {artifact_type} available for this workflow run",
)
# 3. Get storage backend for this workflow run
try:
storage = get_storage_for_backend(workflow_run.storage_backend)
except ValueError as e:
logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
raise HTTPException(status_code=500, detail="Storage configuration error")
# 4. Generate signed URL (1 hour expiration)
try:
signed_url = await storage.aget_signed_url(
file_path=file_path,
expiration=3600, # 1 hour
force_inline=inline,
)
except Exception as e:
logger.error(f"Failed to generate signed URL: {e}")
raise HTTPException(status_code=500, detail="Failed to generate download URL")
if not signed_url:
logger.error(f"Storage returned None for signed URL: {file_path}")
raise HTTPException(status_code=500, detail="Failed to generate download URL")
logger.info(
f"Generated signed URL for {artifact_type}: workflow_run_id={workflow_run.id}, token={token[:8]}..."
)
# 5. Redirect to signed URL
return RedirectResponse(url=signed_url, status_code=302)

View file

@ -97,6 +97,24 @@ class WorkflowResponse(BaseModel):
workflow_configurations: dict | None = None
class WorkflowListResponse(BaseModel):
"""Lightweight response for workflow listings (excludes large fields)."""
id: int
name: str
status: str
created_at: datetime
total_runs: int
class WorkflowCountResponse(BaseModel):
"""Response for workflow count endpoint."""
total: int
active: int
archived: int
class WorkflowTemplateResponse(BaseModel):
id: int
template_name: str
@ -359,6 +377,26 @@ class WorkflowSummaryResponse(BaseModel):
name: str
@router.get("/count")
async def get_workflow_count(
user: UserModel = Depends(get_user),
) -> WorkflowCountResponse:
"""Get workflow counts for the authenticated user's organization.
This is a lightweight endpoint for checking if the user has workflows,
useful for redirect logic without fetching full workflow data.
"""
counts = await db_client.get_workflow_counts(
organization_id=user.selected_organization_id
)
return WorkflowCountResponse(
total=counts["total"],
active=counts["active"],
archived=counts["archived"],
)
@router.get("/fetch")
async def get_workflows(
user: UserModel = Depends(get_user),
@ -366,45 +404,43 @@ async def get_workflows(
None,
description="Filter by status - can be single value (active/archived) or comma-separated (active,archived)",
),
) -> List[WorkflowResponse]:
"""Get all workflows for the authenticated user's organization"""
) -> List[WorkflowListResponse]:
"""Get all workflows for the authenticated user's organization.
Returns a lightweight response with only essential fields for listing.
Use GET /workflow/fetch/{workflow_id} to get full workflow details.
"""
# Handle comma-separated status values
if status and "," in status:
# Split comma-separated values and fetch workflows for each status
status_list = [s.strip() for s in status.split(",")]
all_workflows = []
for status_value in status_list:
workflows = await db_client.get_all_workflows(
workflows = await db_client.get_all_workflows_for_listing(
organization_id=user.selected_organization_id, status=status_value
)
all_workflows.extend(workflows)
workflows = all_workflows
else:
# Single status or no status filter
workflows = await db_client.get_all_workflows(
workflows = await db_client.get_all_workflows_for_listing(
organization_id=user.selected_organization_id, status=status
)
# Get run counts for each workflow
workflow_responses = []
for workflow in workflows:
run_count = await db_client.get_workflow_run_count(workflow.id)
workflow_responses.append(
{
"id": workflow.id,
"name": workflow.name,
"status": workflow.status,
"created_at": workflow.created_at,
"workflow_definition": workflow.workflow_definition_with_fallback,
"current_definition_id": workflow.current_definition_id,
"template_context_variables": workflow.template_context_variables,
"call_disposition_codes": workflow.call_disposition_codes,
"workflow_configurations": workflow.workflow_configurations,
"total_runs": run_count,
}
)
# Get run counts for all workflows in a single query
workflow_ids = [workflow.id for workflow in workflows]
run_counts = await db_client.get_workflow_run_counts(workflow_ids)
return workflow_responses
return [
WorkflowListResponse(
id=workflow.id,
name=workflow.name,
status=workflow.status,
created_at=workflow.created_at,
total_runs=run_counts.get(workflow.id, 0),
)
for workflow in workflows
]
@router.get("/fetch/{workflow_id}")

View file

@ -170,13 +170,6 @@ class CampaignCallDispatcher:
)
raise ValueError(f"Workflow {campaign.workflow_id} not found")
# Merge context variables (queued_run context already includes retry info if applicable)
initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables,
"campaign_id": campaign.id,
}
# Extract phone number
phone_number = queued_run.context_variables.get("phone_number")
if not phone_number:
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
)
raise ValueError(f"No phone number in queued run {queued_run.id}")
# Create workflow run with queued_run_id tracking
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
# Get provider first to determine the mode
provider = await self.get_telephony_provider(campaign.organization_id)
workflow_run_mode = provider.PROVIDER_NAME
logger.info(f"Provider name: {provider.PROVIDER_NAME}")
logger.info(f"Queued run context: {queued_run.context_variables}")
# Merge context variables (queued_run context already includes retry info if applicable)
initial_context = {
**workflow.template_context_variables,
**queued_run.context_variables,
"campaign_id": campaign.id,
"provider": provider.PROVIDER_NAME,
}
logger.info(f"Final initial_context: {initial_context}")
# Create workflow run with queued_run_id tracking
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
try:
workflow_run = await db_client.create_workflow_run(
name=workflow_run_name,
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
to_number=phone_number,
webhook_url=webhook_url,
workflow_run_id=workflow_run.id,
workflow_id=campaign.workflow_id,
user_id=campaign.created_by,
)
# Store provider type and metadata in gathered_context

View file

@ -300,7 +300,7 @@ TTSConfig = Annotated[
###################################################### STT ########################################################################
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
DEEPGRAM_LANGUAGES = [
"multi",
"en",

View file

@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:
# Set the context and audio_buffer after creation
engine.set_context(context)
engine.set_audio_buffer(audio_buffer)
context_aggregator = LLMContextAggregatorPair(context)

View file

@ -12,9 +12,8 @@ from pipecat.frames.frames import (
Frame,
InputAudioRawFrame,
OutputAudioRawFrame,
StartFrame,
)
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
from pipecat.serializers.base_serializer import FrameSerializer
class InternalFrameSerializer(FrameSerializer):
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
preventing control frames from creating infinite loops.
"""
@property
def type(self) -> FrameSerializerType:
"""Internal transport uses binary frames."""
return FrameSerializerType.BINARY
async def setup(self, frame: StartFrame):
"""No setup required for internal transport."""
pass
async def serialize(self, frame: Frame) -> bytes | None:
"""Only serialize audio frames for transmission between agents."""
# Only pass audio frames between agents

View file

@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
def register_transport_event_handlers(
def register_event_handlers(
task: PipelineTask,
transport,
workflow_run_id,
workflow_run_id: int,
engine: PipecatEngine,
audio_buffer: AudioBufferProcessor,
in_memory_logs_buffer: InMemoryLogsBuffer,
pipeline_metrics_aggregator: PipelineMetricsAggregator,
audio_config=AudioConfig,
):
"""Register event handlers for transport events"""
"""Register all event handlers for transport and task events.
Returns:
Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
"""
# Initialize in-memory buffers with proper audio configuration
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
num_channels = 1 # Pipeline audio is always mono
@ -48,13 +53,35 @@ def register_transport_event_handlers(
)
in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)
# Track both events to ensure LLM is only triggered after both occur
ready_state = {
"pipeline_started": False,
"client_connected": False,
"llm_triggered": False,
}
async def maybe_trigger_llm():
"""Trigger LLM only after both pipeline_started and client_connected events."""
if (
ready_state["pipeline_started"]
and ready_state["client_connected"]
and not ready_state["llm_triggered"]
):
ready_state["llm_triggered"] = True
logger.debug(
"Both pipeline_started and client_connected received - triggering initial LLM generation"
)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, participant):
logger.debug("In on_client_connected callback handler - initializing workflow")
async def on_client_connected(_transport, _participant):
logger.debug("In on_client_connected callback handler")
await audio_buffer.start_recording()
ready_state["client_connected"] = True
await maybe_trigger_llm()
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, participant):
async def on_client_disconnected(_transport, _participant):
call_disposed = engine.is_call_disposed()
logger.debug(
@ -69,33 +96,16 @@ def register_transport_event_handlers(
if not call_disposed:
await task.cancel()
# Return the buffers so they can be passed to other handlers
return in_memory_audio_buffer, in_memory_transcript_buffer
def register_task_event_handler(
workflow_run_id: int,
engine: PipecatEngine,
task: PipelineTask,
transport,
audio_buffer: AudioBufferProcessor,
in_memory_audio_buffer: InMemoryAudioBuffer,
in_memory_transcript_buffer: InMemoryTranscriptBuffer,
in_memory_logs_buffer: InMemoryLogsBuffer,
pipeline_metrics_aggregator: PipelineMetricsAggregator,
):
@task.event_handler("on_pipeline_started")
async def on_pipeline_started(task: PipelineTask, frame: Frame):
logger.debug(
"In on_pipeline_started callback handler - triggering initial LLM generation"
)
# Trigger initial LLM generation after pipeline has started
await engine.llm.queue_frame(LLMContextFrame(engine.context))
async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
logger.debug("In on_pipeline_started callback handler")
ready_state["pipeline_started"] = True
await maybe_trigger_llm()
@task.event_handler("on_pipeline_finished")
async def on_pipeline_finished(
task: PipelineTask,
frame: Frame,
_frame: Frame,
):
logger.debug(f"In on_pipeline_finished callback handler")
@ -207,14 +217,13 @@ def register_task_event_handler(
if workflow_run and workflow_run.campaign_id:
await campaign_call_dispatcher.release_call_slot(workflow_run_id)
# Write buffers to temp files and enqueue S3 upload
# Write buffers to temp files and enqueue combined processing task
audio_temp_path = None
transcript_temp_path = None
try:
# Only upload if buffers have content
if not in_memory_audio_buffer.is_empty:
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
await enqueue_job(
FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
)
else:
logger.debug("Audio buffer is empty, skipping upload")
@ -222,11 +231,6 @@ def register_task_event_handler(
transcript_temp_path = (
await in_memory_transcript_buffer.write_to_temp_file()
)
await enqueue_job(
FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
workflow_run_id,
transcript_temp_path,
)
else:
logger.debug("Transcript buffer is empty, skipping upload")
@ -234,10 +238,18 @@ def register_task_event_handler(
logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)
await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
# Combined task: uploads artifacts then runs integrations sequentially
await enqueue_job(
FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
FunctionNames.PROCESS_WORKFLOW_COMPLETION,
workflow_run_id,
audio_temp_path,
transcript_temp_path,
)
# Return the buffers so they can be passed to other handlers
return in_memory_audio_buffer, in_memory_transcript_buffer
def register_audio_data_handler(
audio_buffer: AudioBufferProcessor,
@ -260,18 +272,26 @@ def register_audio_data_handler(
# Could implement overflow to disk here if needed
def register_transcript_handler(
transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
def register_transcript_handlers(
user_aggregator,
assistant_aggregator,
workflow_run_id,
in_memory_buffer: InMemoryTranscriptBuffer,
):
"""Register event handler for transcript updates"""
"""Register event handlers for transcript updates on context aggregators.
@transcript.event_handler("on_transcript_update")
async def on_transcript_update(processor, frame):
transcript_text = ""
for msg in frame.messages:
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
line = f"{timestamp}{msg.role}: {msg.content}\n"
transcript_text += line
Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
transcripts as turns complete, following the event-based pattern.
"""
# Use in-memory buffer
await in_memory_buffer.append(transcript_text)
@user_aggregator.event_handler("on_user_turn_stopped")
async def on_user_turn_stopped(aggregator, strategy, message):
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}user: {message.content}\n"
await in_memory_buffer.append(line)
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
async def on_assistant_turn_stopped(aggregator, message):
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}assistant: {message.content}\n"
await in_memory_buffer.append(line)

View file

@ -1,5 +1,4 @@
import os
from typing import TYPE_CHECKING
from loguru import logger
@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
from pipecat.processors.transcript_processor import TranscriptProcessor
from pipecat.utils.context import turn_var
if TYPE_CHECKING:
from api.services.workflow.pipecat_engine import PipecatEngine
def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
def create_pipeline_components(audio_config: AudioConfig):
"""Create and return the main pipeline components with proper audio configuration"""
logger.info(f"Creating pipeline components with audio config: {audio_config}")
@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
buffer_size=audio_config.buffer_size_bytes,
)
transcript = TranscriptProcessor(
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
)
context = LLMContext()
return audio_buffer, transcript, context
return audio_buffer, context
def build_pipeline(
transport,
stt,
transcript,
audio_buffer,
llm,
tts,
user_context_aggregator,
assistant_context_aggregator,
pipeline_engine_callback_processor,
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
voicemail_detector=None,
):
"""Build the main pipeline with all components.
@ -63,7 +51,7 @@ def build_pipeline(
# Build processors list with optional voicemail detection
processors = [
transport.input(), # Transport user input
stt, # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
stt,
]
# Insert voicemail detector after STT if enabled
@ -76,16 +64,12 @@ def build_pipeline(
# Continue with the rest of the pipeline
processors.extend(
[
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
user_idle_disconnect,
transcript.user(),
user_context_aggregator,
llm, # LLM
pipeline_engine_callback_processor,
tts, # TTS
transport.output(), # Transport bot output
audio_buffer, # AudioBufferProcessor - records both input and output audio
transcript.assistant(),
assistant_context_aggregator, # Assistant spoken responses
pipeline_metrics_aggregator,
]
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
"""Create a pipeline task with appropriate parameters"""
# Set up pipeline params with audio configuration if provided
pipeline_params = PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
send_initial_empty_metrics=False,
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
pipeline,
params=pipeline_params,
enable_tracing=ENABLE_TRACING,
enable_rtvi=False,
conversation_id=f"{workflow_run_id}",
)

View file

@ -7,12 +7,12 @@ from loguru import logger
from api.db import db_client
from api.db.models import WorkflowModel
from api.enums import WorkflowRunMode
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
from api.services.pipecat.event_handlers import (
register_audio_data_handler,
register_task_event_handler,
register_transcript_handler,
register_transport_event_handlers,
register_event_handlers,
register_transcript_handlers,
)
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
from api.services.pipecat.pipeline_builder import (
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.workflow import WorkflowGraph
from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
from pipecat.pipeline.base_task import PipelineTaskParams
from pipecat.processors.aggregators.llm_response import (
from pipecat.processors.aggregators.llm_response_universal import (
LLMAssistantAggregatorParams,
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
)
from pipecat.processors.filters.stt_mute_filter import (
STTMuteConfig,
STTMuteFilter,
STTMuteStrategy,
)
from pipecat.processors.user_idle_processor import UserIdleProcessor
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
from pipecat.turns.user_start import (
ExternalUserTurnStartStrategy,
TranscriptionUserTurnStartStrategy,
)
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
VADUserTurnStartStrategy,
)
from pipecat.turns.user_stop import (
ExternalUserTurnStopStrategy,
TranscriptionUserTurnStopStrategy,
)
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.utils.context import set_current_run_id
from pipecat.utils.enums import EndTaskReason
from pipecat.utils.tracing.context_registry import ContextProviderRegistry
@ -517,12 +522,11 @@ async def _run_pipeline(
embeddings_model=embeddings_model,
)
# Create pipeline components with audio configuration and engine
audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
# Create pipeline components with audio configuration
audio_buffer, context = create_pipeline_components(audio_config)
# Set the context and audio_buffer after creation
engine.set_context(context)
engine.set_audio_buffer(audio_buffer)
# Set Stasis connection for immediate transfers (if available)
if stasis_connection:
@ -532,7 +536,31 @@ async def _run_pipeline(
expect_stripped_words=True,
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
)
user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
# Configure turn strategies based on STT provider and model
# Deepgram Flux uses external turn detection (VAD + External start/stop)
# Other models use transcription-based turn detection with smart turn analyzer
is_deepgram_flux = (
user_config.stt.provider == ServiceProviders.DEEPGRAM.value
and user_config.stt.model == "flux-general-en"
)
if is_deepgram_flux:
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
stop=[ExternalUserTurnStopStrategy()],
)
else:
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
stop=[TranscriptionUserTurnStopStrategy()],
)
user_params = LLMUserAggregatorParams(
user_turn_strategies=user_turn_strategies,
user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
user_idle_timeout=max_user_idle_timeout,
)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params, user_params=user_params
)
@ -547,25 +575,20 @@ async def _run_pipeline(
pipeline_metrics_aggregator = PipelineMetricsAggregator()
# Create STT mute filter using the selected strategies and the engine's callback
stt_mute_filter = STTMuteFilter(
config=STTMuteConfig(
strategies={
STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
STTMuteStrategy.CUSTOM,
},
should_mute_callback=engine.create_should_mute_callback(),
)
)
# Use engine's user idle callback with configured timeout
user_idle_disconnect = UserIdleProcessor(
callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
)
user_context_aggregator = context_aggregator.user()
assistant_context_aggregator = context_aggregator.assistant()
# Register user idle event handlers
user_idle_handler = engine.create_user_idle_handler()
@user_context_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
await user_idle_handler.handle_idle(aggregator)
@user_context_aggregator.event_handler("on_user_turn_started")
async def on_user_turn_started(aggregator, strategy):
user_idle_handler.reset()
# Create voicemail detector if enabled in the workflow's start node
voicemail_detector = None
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
@ -592,16 +615,13 @@ async def _run_pipeline(
pipeline = build_pipeline(
transport,
stt,
transcript,
audio_buffer,
llm,
tts,
user_context_aggregator,
assistant_context_aggregator,
pipeline_engine_callback_processor,
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
voicemail_detector=voicemail_detector,
)
@ -614,18 +634,6 @@ async def _run_pipeline(
# Initialize the engine to set the initial context
await engine.initialize()
# Register event handlers
in_memory_audio_buffer, in_memory_transcript_buffer = (
register_transport_event_handlers(
task,
transport,
workflow_run_id,
engine=engine,
audio_buffer=audio_buffer,
audio_config=audio_config,
)
)
# Add real-time feedback observer if WebSocket sender is available
# Note: ws_sender was already fetched earlier for node_transition_callback
if ws_sender:
@ -635,21 +643,24 @@ async def _run_pipeline(
)
task.add_observer(feedback_observer)
register_task_event_handler(
workflow_run_id,
engine,
# Register event handlers
in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
task,
transport,
audio_buffer,
in_memory_audio_buffer,
in_memory_transcript_buffer,
in_memory_logs_buffer,
pipeline_metrics_aggregator,
workflow_run_id,
engine=engine,
audio_buffer=audio_buffer,
in_memory_logs_buffer=in_memory_logs_buffer,
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
audio_config=audio_config,
)
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
register_transcript_handler(
transcript, workflow_run_id, in_memory_transcript_buffer
register_transcript_handlers(
user_context_aggregator,
assistant_context_aggregator,
workflow_run_id,
in_memory_transcript_buffer,
)
try:

View file

@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
from api.services.configuration.registry import ServiceProviders
from pipecat.services.azure.llm import AzureLLMService
from pipecat.services.cartesia.stt import CartesiaSTTService
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
from pipecat.services.deepgram.tts import DeepgramTTSService
from pipecat.services.dograh.llm import DograhLLMService
@ -34,6 +35,20 @@ def create_stt_service(user_config):
f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
)
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
# Check if using Flux model (English-only, no language selection)
if user_config.stt.model == "flux-general-en":
logger.debug("Using DeepGram Flux Model")
return DeepgramFluxSTTService(
api_key=user_config.stt.api_key,
model=user_config.stt.model,
params=DeepgramFluxSTTService.InputParams(
eot_timeout_ms=3000,
eot_threshold=0.7,
),
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
)
# Other models than flux
# Use language from user config, defaulting to "multi" for multilingual support
language = getattr(user_config.stt, "language", None) or "multi"
live_options = LiveOptions(
@ -44,7 +59,9 @@ def create_stt_service(user_config):
)
logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
return DeepgramSTTService(
live_options=live_options, api_key=user_config.stt.api_key
live_options=live_options,
api_key=user_config.stt.api_key,
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
)
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
return OpenAISTTService(

View file

@ -2,10 +2,9 @@ import os
from fastapi import WebSocket
from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
from api.constants import APP_ROOT_DIR
from api.db import db_client
from api.enums import OrganizationConfigurationKey
from api.services.looptalk.internal_transport import InternalTransport
from api.services.pipecat.audio_config import AudioConfig
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
StasisRTPTransport,
StasisRTPTransportParams,
)
from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
from pipecat.serializers.twilio import TwilioFrameSerializer
from pipecat.serializers.vobiz import VobizFrameSerializer
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
)
def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
"""Create a turn analyzer backed by the local Smart Turn HTTP service.
Args:
workflow_run_id: ID of the workflow run for turn analyzer context
audio_config: Audio configuration containing pipeline sample rate
"""
if ENABLE_SMART_TURN:
return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
return None
async def create_twilio_transport(
websocket_client: WebSocket,
stream_sid: str,
@ -78,8 +61,6 @@ async def create_twilio_transport(
f"Incomplete Twilio configuration for organization {organization_id}"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = TwilioFrameSerializer(
stream_sid=stream_sid,
call_sid=call_sid,
@ -119,11 +100,7 @@ async def create_twilio_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -158,8 +135,6 @@ async def create_cloudonix_transport(
f"Required: bearer_token, domain_id"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
from pipecat.serializers.cloudonix import CloudonixFrameSerializer
serializer = CloudonixFrameSerializer(
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -238,8 +209,6 @@ async def create_vonage_transport(
f"Incomplete Vonage configuration for organization {organization_id}"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = VonageFrameSerializer(
call_uuid=call_uuid,
application_id=application_id,
@ -283,11 +252,7 @@ async def create_vonage_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -337,8 +302,6 @@ async def create_vobiz_transport(
f"from_numbers={len(config.get('from_numbers', []))} numbers"
)
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
# Use VobizFrameSerializer for Vobiz WebSocket protocol
serializer = VobizFrameSerializer(
stream_id=stream_id,
@ -389,11 +352,7 @@ async def create_vobiz_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -411,7 +370,6 @@ def create_webrtc_transport(
ambient_noise_config: dict | None = None,
):
"""Create a transport for WebRTC connections"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
return SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
@ -445,10 +403,6 @@ def create_webrtc_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -461,7 +415,6 @@ def create_stasis_transport(
ambient_noise_config: dict | None = None,
):
"""Create a transport for ARI connections"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
serializer = StasisRTPFrameSerializer(
StasisRTPFrameSerializer.InputParams(
@ -502,11 +455,7 @@ def create_stasis_transport(
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
serializer=serializer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
)
@ -528,46 +477,44 @@ def create_internal_transport(
Returns:
InternalTransport instance configured with turn analyzer
"""
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
pass
# Commented out because looptalk coming in the regular import flow
# was causing issue. May be move this to looptalk/orchestrator.py
# Create and return the internal transport with latency
return InternalTransport(
params=TransportParams(
audio_out_enabled=True,
audio_out_sample_rate=audio_config.transport_out_sample_rate,
audio_out_channels=1,
audio_in_enabled=True,
audio_in_sample_rate=audio_config.transport_in_sample_rate,
audio_in_channels=1,
vad_analyzer=(
SileroVADAnalyzer(
params=VADParams(
confidence=vad_config.get("confidence", 0.7),
start_secs=vad_config.get("start_seconds", 0.4),
stop_secs=vad_config.get("stop_seconds", 0.8),
min_volume=vad_config.get("minimum_volume", 0.6),
)
)
if vad_config
else SileroVADAnalyzer()
),
audio_out_mixer=(
SoundfileMixer(
sound_files={
"office": APP_ROOT_DIR
/ "assets"
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
},
default_sound="office",
volume=ambient_noise_config.get("volume", 0.3),
)
if ambient_noise_config and ambient_noise_config.get("enabled", False)
else SilenceAudioMixer()
),
turn_analyzer=turn_analyzer,
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
if ENABLE_RNNOISE
else None,
),
latency_seconds=latency_seconds,
)
# return InternalTransport(
# params=TransportParams(
# audio_out_enabled=True,
# audio_out_sample_rate=audio_config.transport_out_sample_rate,
# audio_out_channels=1,
# audio_in_enabled=True,
# audio_in_sample_rate=audio_config.transport_in_sample_rate,
# audio_in_channels=1,
# vad_analyzer=(
# SileroVADAnalyzer(
# params=VADParams(
# confidence=vad_config.get("confidence", 0.7),
# start_secs=vad_config.get("start_seconds", 0.4),
# stop_secs=vad_config.get("stop_seconds", 0.8),
# min_volume=vad_config.get("minimum_volume", 0.6),
# )
# )
# if vad_config
# else SileroVADAnalyzer()
# ),
# audio_out_mixer=(
# SoundfileMixer(
# sound_files={
# "office": APP_ROOT_DIR
# / "assets"
# / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
# },
# default_sound="office",
# volume=ambient_noise_config.get("volume", 0.3),
# )
# if ambient_noise_config and ambient_noise_config.get("enabled", False)
# else SilenceAudioMixer()
# ),
# ),
# latency_seconds=latency_seconds,
# )

View file

@ -15,6 +15,8 @@ The serializer:
from typing import Optional
from loguru import logger
from pydantic import BaseModel
from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
from pipecat.frames.frames import (
AudioRawFrame,
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
InputAudioRawFrame,
StartFrame,
)
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
from pydantic import BaseModel
from pipecat.serializers.base_serializer import FrameSerializer
class StasisRTPFrameSerializer(FrameSerializer):
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
# Resampler shared between encode / decode paths
self._resampler = create_default_resampler()
@property
def type(self) -> FrameSerializerType:
"""Stasis uses raw bytes → BINARY."""
return FrameSerializerType.BINARY
async def setup(self, frame: StartFrame):
"""Remember pipeline configuration."""
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate

View file

@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason
if TYPE_CHECKING:
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
from pipecat.services.anthropic.llm import AnthropicLLMService
from pipecat.services.google.llm import GoogleLLMService
from pipecat.services.openai.llm import OpenAILLMService
@ -64,7 +63,6 @@ class PipecatEngine:
transport: Optional[BaseTransport] = None,
workflow: WorkflowGraph,
call_context_vars: dict,
audio_buffer: Optional["AudioBuffer"] = None,
workflow_run_id: Optional[int] = None,
node_transition_callback: Optional[
Callable[[str, Optional[str]], Awaitable[None]]
@ -78,7 +76,6 @@ class PipecatEngine:
self.transport = transport
self.workflow = workflow
self._call_context_vars = call_context_vars
self._audio_buffer = audio_buffer
self._workflow_run_id = workflow_run_id
self._node_transition_callback = node_transition_callback
self._initialized = False
@ -204,6 +201,7 @@ class PipecatEngine:
logger.info(f"Arguments: {function_call_params.arguments}")
await self.set_node(transition_to_node)
try:
async def on_context_updated() -> None:
"""
pipecat framework will run this function after the function call result has been updated in the context.
@ -215,6 +213,12 @@ class PipecatEngine:
self._current_node
)
# Queue EndFrame if we just transitioned to EndNode
if self._current_node.is_end:
await self.send_end_task_frame(
EndTaskReason.USER_QUALIFIED.value
)
result = {"status": "done"}
properties = FunctionCallResultProperties(
@ -478,8 +482,6 @@ class PipecatEngine:
if node.extraction_enabled and node.extraction_variables:
await self._perform_variable_extraction_if_needed(node)
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
async def _handle_agent_node(self, node: Node) -> None:
"""Handle agent node execution."""
if node.is_static:
@ -680,12 +682,12 @@ class PipecatEngine:
"""
return engine_callbacks.create_should_mute_callback(self)
def create_user_idle_callback(self):
def create_user_idle_handler(self):
"""
This callback is called when the user is idle for a certain duration.
We use this to either play the static text or end the call
Returns a UserIdleHandler that manages user-idle timeouts with state.
The handler tracks retry count and handles escalating prompts.
"""
return engine_callbacks.create_user_idle_callback(self)
return engine_callbacks.create_user_idle_handler(self)
def create_max_duration_callback(self):
"""
@ -721,14 +723,6 @@ class PipecatEngine:
"""
self.task = task
def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
"""Set the audio buffer.
This allows setting the audio buffer after the engine has been created,
which is useful when the audio buffer needs to be created after the engine.
"""
self._audio_buffer = audio_buffer
def set_stasis_connection(
self, connection: Optional["StasisRTPConnection"]
) -> None:

View file

@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason
if TYPE_CHECKING:
from api.services.workflow.pipecat_engine import PipecatEngine
from pipecat.processors.user_idle_processor import UserIdleProcessor
# ---------------------------------------------------------------------------
@ -57,33 +56,43 @@ def create_should_mute_callback(
# ---------------------------------------------------------------------------
def create_user_idle_callback(engine: "PipecatEngine"):
"""Return a callback that handles user-idle timeouts."""
class UserIdleHandler:
"""Helper class to manage user idle retry logic with state."""
async def handle_user_idle(
user_idle: "UserIdleProcessor", retry_count: int
) -> bool:
logger.debug(f"Handling user_idle, attempt: {retry_count}")
def __init__(self, engine: "PipecatEngine"):
self._engine = engine
self._retry_count = 0
if retry_count == 1:
def reset(self):
"""Reset the retry count when user becomes active."""
self._retry_count = 0
async def handle_idle(self, aggregator):
"""Handle user idle event with escalating prompts."""
self._retry_count += 1
logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
if self._retry_count == 1:
message = {
"role": "system",
"content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
}
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
return True
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
return
message = {
"role": "system",
"content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
}
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
await engine.send_end_task_frame(
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
await self._engine.send_end_task_frame(
EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
)
return False
return handle_user_idle
def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
"""Return a UserIdleHandler that manages user-idle timeouts with state."""
return UserIdleHandler(engine)
# ---------------------------------------------------------------------------

View file

@ -49,8 +49,7 @@ from api.tasks.campaign_tasks import (
from api.tasks.knowledge_base_processing import process_knowledge_base_document
from api.tasks.run_integrations import run_integrations_post_workflow_run
from api.tasks.s3_upload import (
upload_audio_to_s3,
upload_transcript_to_s3,
process_workflow_completion,
upload_voicemail_audio_to_s3,
)
@ -59,9 +58,8 @@ class WorkerSettings:
functions = [
calculate_workflow_run_cost,
run_integrations_post_workflow_run,
upload_audio_to_s3,
upload_transcript_to_s3,
upload_voicemail_audio_to_s3,
process_workflow_completion,
sync_campaign_source,
process_campaign_batch,
monitor_campaign_progress,

View file

@ -1,8 +1,7 @@
class FunctionNames:
CALCULATE_WORKFLOW_RUN_COST = "calculate_workflow_run_cost"
RUN_INTEGRATIONS_POST_WORKFLOW_RUN = "run_integrations_post_workflow_run"
UPLOAD_AUDIO_TO_S3 = "upload_audio_to_s3"
UPLOAD_TRANSCRIPT_TO_S3 = "upload_transcript_to_s3"
PROCESS_WORKFLOW_COMPLETION = "process_workflow_completion"
UPLOAD_VOICEMAIL_AUDIO_TO_S3 = "upload_voicemail_audio_to_s3"
SYNC_CAMPAIGN_SOURCE = "sync_campaign_source"
PROCESS_CAMPAIGN_BATCH = "process_campaign_batch"

View file

@ -1,10 +1,11 @@
"""Execute webhook integrations after workflow run completion."""
from typing import Any, Dict
from typing import Any, Dict, Optional
import httpx
from loguru import logger
from api.constants import BACKEND_API_ENDPOINT
from api.db import db_client
from api.db.models import WorkflowRunModel
from api.utils.credential_auth import build_auth_header
@ -54,10 +55,13 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
logger.info(f"Found {len(webhook_nodes)} webhook nodes to execute")
# Step 4: Build render context
render_context = _build_render_context(workflow_run)
# Step 4: Generate public access token (on-demand, only when webhooks exist)
public_token = await db_client.ensure_public_access_token(workflow_run_id)
# Step 5: Execute each webhook node
# Step 5: Build render context
render_context = _build_render_context(workflow_run, public_token)
# Step 6: Execute each webhook node
for node in webhook_nodes:
webhook_data = node.get("data", {})
try:
@ -77,9 +81,19 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
raise
def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
"""Build the context dict for template rendering."""
return {
def _build_render_context(
workflow_run: WorkflowRunModel, public_token: Optional[str] = None
) -> Dict[str, Any]:
"""Build the context dict for template rendering.
Args:
workflow_run: The workflow run model
public_token: Optional public access token for download URLs
Returns:
Dict containing all fields available for template rendering
"""
context = {
# Top-level fields
"workflow_run_id": workflow_run.id,
"workflow_run_name": workflow_run.name,
@ -89,10 +103,25 @@ def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
"initial_context": workflow_run.initial_context or {},
"gathered_context": workflow_run.gathered_context or {},
"cost_info": workflow_run.usage_info or {},
"recording_url": getattr(workflow_run, "recording_url", None),
"transcript_url": getattr(workflow_run, "transcript_url", None),
}
# Add public download URLs if token is available
if public_token:
base_url = (
f"{BACKEND_API_ENDPOINT}/api/v1/public/download/workflow/{public_token}"
)
context["recording_url"] = (
f"{base_url}/recording" if workflow_run.recording_url else None
)
context["transcript_url"] = (
f"{base_url}/transcript" if workflow_run.transcript_url else None
)
else:
context["recording_url"] = workflow_run.recording_url
context["transcript_url"] = workflow_run.transcript_url
return context
async def _execute_webhook_node(
webhook_data: Dict[str, Any],

View file

@ -1,129 +1,27 @@
import os
from typing import Optional
from loguru import logger
from pipecat.utils.context import set_current_run_id
from api.db import db_client
from api.services.storage import get_current_storage_backend, storage_fs
async def upload_audio_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
"""Upload audio file from temp path to S3."""
run_id = str(workflow_run_id)
set_current_run_id(run_id)
logger.info(f"Starting audio upload to S3 from {temp_file_path}")
try:
# Verify temp file exists
if not os.path.exists(temp_file_path):
logger.error(f"Temp audio file not found: {temp_file_path}")
raise FileNotFoundError(f"Temp audio file not found: {temp_file_path}")
file_size = os.path.getsize(temp_file_path)
logger.debug(f"Audio file size: {file_size} bytes")
recording_url = f"recordings/{workflow_run_id}.wav"
storage_backend = get_current_storage_backend()
logger.info(
f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for audio upload - workflow_run_id: {workflow_run_id}"
)
await storage_fs.aupload_file(temp_file_path, recording_url)
# Update DB with recording URL and storage backend
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
logger.info(
f"Successfully uploaded audio to {storage_backend.name}: {recording_url} (stored backend: {storage_backend.name})"
)
except Exception as e:
logger.error(f"Error uploading audio to S3 for workflow {workflow_run_id}: {e}")
raise
finally:
# Clean up temp file
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
logger.debug(f"Cleaned up temp audio file: {temp_file_path}")
except Exception as e:
logger.warning(
f"Failed to clean up temp audio file {temp_file_path}: {e}"
)
async def upload_transcript_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
"""Upload transcript file from temp path to S3."""
run_id = str(workflow_run_id)
set_current_run_id(run_id)
logger.info(f"Starting transcript upload to S3 from {temp_file_path}")
try:
# Verify temp file exists
if not os.path.exists(temp_file_path):
logger.error(f"Temp transcript file not found: {temp_file_path}")
raise FileNotFoundError(f"Temp transcript file not found: {temp_file_path}")
file_size = os.path.getsize(temp_file_path)
logger.debug(f"Transcript file size: {file_size} bytes")
transcript_url = f"transcripts/{workflow_run_id}.txt"
storage_backend = get_current_storage_backend()
logger.info(
f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for transcript upload - workflow_run_id: {workflow_run_id}"
)
await storage_fs.aupload_file(temp_file_path, transcript_url)
# Update DB with transcript URL and storage backend
await db_client.update_workflow_run(
run_id=workflow_run_id,
transcript_url=transcript_url,
storage_backend=storage_backend.value,
)
logger.info(
f"Successfully uploaded transcript to {storage_backend.name}: {transcript_url} (stored backend: {storage_backend.name})"
)
except Exception as e:
logger.error(
f"Error uploading transcript to S3 for workflow {workflow_run_id}: {e}"
)
raise
finally:
# Clean up temp file
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
logger.debug(f"Cleaned up temp transcript file: {temp_file_path}")
except Exception as e:
logger.warning(
f"Failed to clean up temp transcript file {temp_file_path}: {e}"
)
from api.tasks.run_integrations import run_integrations_post_workflow_run
from pipecat.utils.context import set_current_run_id
async def upload_voicemail_audio_to_s3(
ctx,
_ctx,
workflow_run_id: int,
temp_file_path: str,
s3_key: str,
):
"""Upload voicemail detection audio from temp file to S3.
This function is similar to upload_audio_to_s3 but handles voicemail-specific
paths and doesn't update the workflow run's recording_url field.
Handles voicemail-specific paths and doesn't update the workflow run's
recording_url field.
Args:
ctx: ARQ context
_ctx: ARQ context (unused)
workflow_run_id: The workflow run ID
temp_file_path: Path to the temporary WAV file
s3_key: The S3 key where the file should be uploaded
@ -161,7 +59,7 @@ async def upload_voicemail_audio_to_s3(
)
raise
finally:
# Clean up temp file (same pattern as upload_audio_to_s3)
# Clean up temp file
if os.path.exists(temp_file_path):
try:
os.remove(temp_file_path)
@ -170,3 +68,104 @@ async def upload_voicemail_audio_to_s3(
logger.warning(
f"Failed to clean up temp voicemail audio file {temp_file_path}: {e}"
)
async def process_workflow_completion(
_ctx,
workflow_run_id: int,
audio_temp_path: Optional[str] = None,
transcript_temp_path: Optional[str] = None,
):
"""Process workflow completion: upload artifacts and run integrations.
This task combines audio upload, transcript upload, and webhook integrations
into a single sequential task to ensure integrations run after uploads complete.
Args:
_ctx: ARQ context (unused)
workflow_run_id: The workflow run ID
audio_temp_path: Optional path to temp audio file
transcript_temp_path: Optional path to temp transcript file
"""
run_id = str(workflow_run_id)
set_current_run_id(run_id)
logger.info(f"Processing workflow completion for run {workflow_run_id}")
storage_backend = get_current_storage_backend()
# Step 1: Upload audio if provided
if audio_temp_path:
try:
if os.path.exists(audio_temp_path):
file_size = os.path.getsize(audio_temp_path)
logger.debug(f"Audio file size: {file_size} bytes")
recording_url = f"recordings/{workflow_run_id}.wav"
logger.info(
f"Uploading audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
await storage_fs.aupload_file(audio_temp_path, recording_url)
await db_client.update_workflow_run(
run_id=workflow_run_id,
recording_url=recording_url,
storage_backend=storage_backend.value,
)
logger.info(f"Successfully uploaded audio: {recording_url}")
else:
logger.warning(f"Audio temp file not found: {audio_temp_path}")
except Exception as e:
logger.error(f"Error uploading audio for workflow {workflow_run_id}: {e}")
finally:
if audio_temp_path and os.path.exists(audio_temp_path):
try:
os.remove(audio_temp_path)
logger.debug(f"Cleaned up temp audio file: {audio_temp_path}")
except Exception as e:
logger.warning(f"Failed to clean up temp audio file: {e}")
# Step 2: Upload transcript if provided
if transcript_temp_path:
try:
if os.path.exists(transcript_temp_path):
file_size = os.path.getsize(transcript_temp_path)
logger.debug(f"Transcript file size: {file_size} bytes")
transcript_url = f"transcripts/{workflow_run_id}.txt"
logger.info(
f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
)
await storage_fs.aupload_file(transcript_temp_path, transcript_url)
await db_client.update_workflow_run(
run_id=workflow_run_id,
transcript_url=transcript_url,
storage_backend=storage_backend.value,
)
logger.info(f"Successfully uploaded transcript: {transcript_url}")
else:
logger.warning(
f"Transcript temp file not found: {transcript_temp_path}"
)
except Exception as e:
logger.error(
f"Error uploading transcript for workflow {workflow_run_id}: {e}"
)
finally:
if transcript_temp_path and os.path.exists(transcript_temp_path):
try:
os.remove(transcript_temp_path)
logger.debug(
f"Cleaned up temp transcript file: {transcript_temp_path}"
)
except Exception as e:
logger.warning(f"Failed to clean up temp transcript file: {e}")
# Step 3: Run webhook integrations (after uploads are complete)
try:
await run_integrations_post_workflow_run(_ctx, workflow_run_id)
except Exception as e:
logger.error(f"Error running integrations for workflow {workflow_run_id}: {e}")
logger.info(f"Completed workflow completion processing for run {workflow_run_id}")

View file

@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Any, Dict
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
from unittest.mock import Mock
import pytest
@ -28,6 +28,87 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
START_CALL_SYSTEM_PROMPT = "start_call_system_prompt"
END_CALL_SYSTEM_PROMPT = "end_call_system_prompt"
# Default workflow definition for mocking database WorkflowModel
DEFAULT_WORKFLOW_DEFINITION = {
"nodes": [
{
"id": "1",
"type": "startCall",
"position": {"x": 0, "y": 0},
"data": {
"name": "Start",
"prompt": START_CALL_SYSTEM_PROMPT,
"is_start": True,
"allow_interrupt": False,
"add_global_prompt": False,
},
},
{
"id": "2",
"type": "endCall",
"position": {"x": 0, "y": 200},
"data": {
"name": "End",
"prompt": END_CALL_SYSTEM_PROMPT,
"is_end": True,
"allow_interrupt": False,
"add_global_prompt": False,
},
},
],
"edges": [
{
"id": "1-2",
"source": "1",
"target": "2",
"data": {"label": "End", "condition": "End the call"},
}
],
}
@dataclass
class MockWorkflowModel:
"""Mock database WorkflowModel for testing.
This mimics the structure of the database WorkflowModel, not the parsed WorkflowGraph.
Use this when mocking db_client.get_workflow() responses.
"""
workflow_id: int = 1
organization_id: int = 1
workflow_configurations: Dict[str, Any] = field(default_factory=dict)
workflow_definition_with_fallback: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
if not self.workflow_definition_with_fallback:
self.workflow_definition_with_fallback = DEFAULT_WORKFLOW_DEFINITION.copy()
@dataclass
class MockWorkflowRun:
"""Mock database WorkflowRun for testing.
Use this when mocking db_client.get_workflow_run() responses.
"""
is_completed: bool = False
initial_context: Dict[str, Any] = field(default_factory=dict)
gathered_context: Dict[str, Any] = field(default_factory=dict)
@dataclass
class MockUserConfig:
"""Mock user configuration for testing.
Use this when mocking db_client.get_user_configurations() responses.
"""
stt: Optional[Any] = None
tts: Optional[Any] = None
llm: Optional[Any] = None
embeddings: Optional[Any] = None
class MockTransportProcessor(FrameProcessor):
"""
@ -41,7 +122,7 @@ class MockTransportProcessor(FrameProcessor):
Args:
emit_bot_speaking: If True, also emits BotSpeakingFrame on TTSAudioRawFrame
which is needed for UserIdleProcessor to start conversation tracking. Default True.
which is needed for user idle tracking to start conversation tracking. Default True.
"""
def __init__(
@ -63,7 +144,7 @@ class MockTransportProcessor(FrameProcessor):
BotStartedSpeakingFrame(), direction=FrameDirection.UPSTREAM
)
elif isinstance(frame, TTSAudioRawFrame):
# Emit BotSpeakingFrame - this is what triggers the UserIdleProcessor
# Emit BotSpeakingFrame - this is what triggers user idle tracking
# to start conversation tracking
if self._emit_bot_speaking:
await self.push_frame(BotSpeakingFrame())
@ -101,6 +182,24 @@ def mock_engine():
return engine
@pytest.fixture
def mock_workflow_model():
"""Create a mock WorkflowModel for testing database responses."""
return MockWorkflowModel()
@pytest.fixture
def mock_workflow_run():
"""Create a mock WorkflowRun for testing database responses."""
return MockWorkflowRun()
@pytest.fixture
def mock_user_config():
"""Create a mock user configuration for testing."""
return MockUserConfig()
@pytest.fixture
def sample_tools():
"""Create sample mock tools for testing."""

View file

@ -42,7 +42,6 @@ from pipecat.processors.aggregators.llm_response_universal import (
)
from pipecat.tests import MockLLMService, MockTTSService
# Define prompts for test nodes
START_NODE_PROMPT = "Start Node System Prompt"
AGENT_NODE_PROMPT = "Agent Node System Prompt"
@ -143,14 +142,20 @@ class ContextCapturingMockLLM(MockLLMService):
msg_copy = dict(msg)
# Copy content to avoid reference issues
if "content" in msg_copy:
msg_copy["content"] = str(msg_copy["content"]) if msg_copy["content"] else None
msg_copy["content"] = (
str(msg_copy["content"]) if msg_copy["content"] else None
)
messages_snapshot.append(msg_copy)
self.captured_contexts.append({
"step": self._current_step,
"messages": messages_snapshot,
"system_prompt": messages_snapshot[0]["content"] if messages_snapshot else None,
})
self.captured_contexts.append(
{
"step": self._current_step,
"messages": messages_snapshot,
"system_prompt": messages_snapshot[0]["content"]
if messages_snapshot
else None,
}
)
# Call parent implementation to stream the mock chunks
return await super()._stream_chat_completions_universal_context(context)
@ -306,14 +311,26 @@ class TestContextUpdateBeforeNextCompletion:
transition completes. The test verifies the context is still correctly updated.
"""
# Step 0 (Start node): call collect_info to transition to agent
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
])
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "collect_info",
"arguments": {},
"tool_call_id": "call_transition_1",
},
]
)
# Step 1 (Agent node): call end_call to transition to end
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
])
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition_2",
},
]
)
# Step 2 (End node): text response (end node has no outgoing edges)
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -327,7 +344,7 @@ class TestContextUpdateBeforeNextCompletion:
)
# Should have been called 3 times: start node, agent node, end node
assert llm.get_current_step() == 2, (
assert llm.get_current_step() == 3, (
f"Expected 3 LLM generations (start, agent, end), got {llm.get_current_step()}"
)
@ -376,14 +393,26 @@ class TestContextUpdateBeforeNextCompletion:
is handled correctly.
"""
# Step 0 (Start node): call collect_info to transition to agent
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
])
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "collect_info",
"arguments": {},
"tool_call_id": "call_transition_1",
},
]
)
# Step 1 (Agent node): call end_call to transition to end
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
])
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition_2",
},
]
)
# Step 2 (End node): text response
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -397,7 +426,7 @@ class TestContextUpdateBeforeNextCompletion:
)
# Verify all three nodes were executed
assert llm.get_current_step() == 2, (
assert llm.get_current_step() == 3, (
f"Expected 3 steps, got {llm.get_current_step()}"
)
@ -408,8 +437,7 @@ class TestContextUpdateBeforeNextCompletion:
assert AGENT_NODE_PROMPT in llm.get_system_prompt_at_step(1)
# Step 2: End node - should have end prompt
# FIXME - EndFrame is getting processed before LLMContextFrame
# assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)
assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)
# Verify each subsequent step has the previous tool results
step_1_ctx = llm.get_context_at_step(1)
@ -423,14 +451,14 @@ class TestContextUpdateBeforeNextCompletion:
assert step_1_has_tool, "Agent node should see collect_info tool result"
# Step 2 should have tool results from both transitions
# FIXME - EndFrame is getting processed before LLMContextFrame
# step_2_tool_messages = [
# msg for msg in step_2_ctx["messages"]
# if msg.get("role") == "tool" or msg.get("tool_call_id")
# ]
# assert len(step_2_tool_messages) >= 2, (
# f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
# )
step_2_tool_messages = [
msg
for msg in step_2_ctx["messages"]
if msg.get("role") == "tool" or msg.get("tool_call_id")
]
assert len(step_2_tool_messages) >= 2, (
f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
)
@pytest.mark.asyncio
async def test_context_messages_preserve_conversation_history(
@ -444,14 +472,26 @@ class TestContextUpdateBeforeNextCompletion:
- Tool call messages and results
"""
# Step 0 (Start node): call collect_info to transition to agent
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
])
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "collect_info",
"arguments": {},
"tool_call_id": "call_transition_1",
},
]
)
# Step 1 (Agent node): call end_call to transition to end
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
])
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
[
{
"name": "end_call",
"arguments": {},
"tool_call_id": "call_transition_2",
},
]
)
# Step 2 (End node): text response
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -472,18 +512,15 @@ class TestContextUpdateBeforeNextCompletion:
assert len(ctx_1["messages"]) > len(ctx_0["messages"]), (
"Context at step 1 should have more messages than step 0"
)
# FIXME
# assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
# "Context at step 2 should have more messages than step 1"
# )
assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
"Context at step 2 should have more messages than step 1"
)
# Verify assistant messages are accumulated
# FIXME
# assistant_messages_at_step_2 = [
# msg for msg in ctx_2["messages"]
# if msg.get("role") == "assistant"
# ]
# assert len(assistant_messages_at_step_2) >= 2, (
# "Should have at least 2 assistant messages by step 2"
# )
assistant_messages_at_step_2 = [
msg for msg in ctx_2["messages"] if msg.get("role") == "assistant"
]
assert len(assistant_messages_at_step_2) >= 2, (
"Should have at least 2 assistant messages by step 2"
)

View file

@ -0,0 +1,100 @@
import asyncio
import pytest
from loguru import logger
from pipecat.frames.frames import (
EndTaskFrame,
Frame,
InterruptionTaskFrame,
LLMRunFrame,
)
from pipecat.pipeline.base_task import PipelineTaskParams
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class MockTransport(FrameProcessor):
def __init__(self, **kwargs):
super().__init__(**kwargs)
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
await self.push_frame(frame, direction)
class BusyWaitProcessor(FrameProcessor):
def __init__(self, wait_time=5.0, **kwargs):
super().__init__(**kwargs)
self._wait_time = wait_time
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, LLMRunFrame):
# Simulate a delay, which can happen sometimes due to slow LLM Inferencing or
# other reasons
try:
logger.debug(f"{self} sleeping with frame: {frame}")
await asyncio.sleep(5)
logger.debug(f"{self} woke up with frame: {frame}")
except asyncio.CancelledError:
logger.debug(f"{self} was cancelled")
raise
await self.push_frame(frame, direction)
@pytest.mark.asyncio
async def test_interruption_with_blocked_end_frame():
busy_wait_processor = BusyWaitProcessor(wait_time=5)
transport = MockTransport()
pipeline = Pipeline([transport, busy_wait_processor])
task = PipelineTask(pipeline)
async def run_pipeline():
loop = asyncio.get_running_loop()
params = PipelineTaskParams(loop=loop)
await task.run(params=params)
async def queue_frame():
await task.queue_frames([LLMRunFrame()])
# Send EndTaskFrame to simulate EndFrame
await asyncio.sleep(0.1)
await transport.queue_frame(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
# Simulate an Interruption, which can happen if the user
# has started to speak
await asyncio.sleep(0.1)
await transport.queue_frame(
InterruptionTaskFrame(), direction=FrameDirection.UPSTREAM
)
# Create tasks explicitly for better control
pipeline_task = asyncio.create_task(run_pipeline())
queue_task = asyncio.create_task(queue_frame())
# Wait with timeout
done, pending = await asyncio.wait(
[pipeline_task, queue_task],
timeout=1.0,
return_when=asyncio.ALL_COMPLETED,
)
# If there are pending tasks, we timed out
if pending:
# Cancel all pending tasks
for t in pending:
t.cancel()
# Give limited time for cleanup, then move on regardless
try:
await asyncio.wait_for(
asyncio.gather(*pending, return_exceptions=True),
timeout=1.0,
)
except asyncio.TimeoutError:
pass # Cleanup took too long, continue anyway
pytest.fail("Test timed out after 1 second")

View file

@ -1,10 +1,10 @@
"""
Simulates a user idle condition and tests the behaviour
of the user idle processor.
of the user idle handler.
This module tests the behavior when the user becomes idle during a conversation,
ensuring the UserIdleProcessor properly triggers the callback and the engine
handles it correctly.
ensuring the user_idle_timeout in LLMUserAggregatorParams properly triggers
the on_user_turn_idle event and the engine handles it correctly.
"""
import asyncio
@ -23,8 +23,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.processors.user_idle_processor import UserIdleProcessor
from pipecat.tests import MockLLMService, MockTTSService
@ -32,8 +32,8 @@ async def run_pipeline_with_user_idle(
workflow: WorkflowGraph,
user_idle_timeout: float = 0.2,
mock_steps: list | None = None,
) -> tuple[MockLLMService, LLMContext, UserIdleProcessor]:
"""Run a pipeline with UserIdleProcessor and simulate user idle condition.
) -> tuple[MockLLMService, LLMContext]:
"""Run a pipeline with user_idle_timeout and simulate user idle condition.
Args:
workflow: The workflow graph to use.
@ -42,7 +42,7 @@ async def run_pipeline_with_user_idle(
defaults to a simple greeting followed by text responses.
Returns:
Tuple of (MockLLMService, LLMContext, UserIdleProcessor) for assertions.
Tuple of (MockLLMService, LLMContext) for assertions.
"""
# Create mock responses - bot will speak first, then respond to idle prompts
# Step 1: Initial greeting
@ -64,10 +64,11 @@ async def run_pipeline_with_user_idle(
# Create LLM context
context = LLMContext()
# Create context aggregator with both user and assistant aggregators
# Create context aggregator with user_idle_timeout in user_params
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
user_params = LLMUserAggregatorParams(user_idle_timeout=user_idle_timeout)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params
context, assistant_params=assistant_params, user_params=user_params
)
user_context_aggregator = context_aggregator.user()
assistant_context_aggregator = context_aggregator.assistant()
@ -81,18 +82,20 @@ async def run_pipeline_with_user_idle(
workflow_run_id=1,
)
# Create UserIdleProcessor with engine's callback and a short timeout
user_idle_processor = UserIdleProcessor(
callback=engine.create_user_idle_callback(),
timeout=user_idle_timeout,
)
# Register user idle event handlers
user_idle_handler = engine.create_user_idle_handler()
# Build the pipeline:
# llm -> mock_transport -> user_idle_processor -> assistant_context_aggregator
# The user_context_aggregator would normally be at the start for user input
@user_context_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
await user_idle_handler.handle_idle(aggregator)
@user_context_aggregator.event_handler("on_user_turn_started")
async def on_user_turn_started(aggregator, strategy):
user_idle_handler.reset()
# Build the pipeline
pipeline = Pipeline(
[
user_idle_processor,
user_context_aggregator,
llm,
tts,
@ -154,11 +157,11 @@ async def run_pipeline_with_user_idle(
return_exceptions=True,
)
return llm, context, user_idle_processor
return llm, context
class TestUserIdleHandler:
"""Test user idle handling through PipecatEngine and UserIdleProcessor."""
"""Test user idle handling through PipecatEngine and UserIdleHandler."""
@pytest.mark.asyncio
async def test_user_idle_triggers_callback(self, simple_workflow: WorkflowGraph):
@ -167,13 +170,13 @@ class TestUserIdleHandler:
This test verifies that when:
1. The bot starts speaking (triggers conversation tracking)
2. No user input is received for the timeout period
3. The UserIdleProcessor triggers the idle callback
3. The on_user_turn_idle event triggers the idle handler
The engine's user idle callback should:
The engine's user idle handler should:
- First retry: Send a message asking if user is still there
- Second retry: Send goodbye message and end the call
"""
llm, context, user_idle_processor = await run_pipeline_with_user_idle(
llm, context = await run_pipeline_with_user_idle(
workflow=simple_workflow,
user_idle_timeout=0.2, # Short timeout for faster test
)
@ -220,7 +223,7 @@ class TestUserIdleHandler:
MockLLMService.create_text_chunks("Response 3"),
]
llm, context, user_idle_processor = await run_pipeline_with_user_idle(
llm, context = await run_pipeline_with_user_idle(
workflow=three_node_workflow,
user_idle_timeout=0.2,
mock_steps=mock_steps,

View file

@ -1,6 +1,6 @@
services:
postgres:
image: postgres:17
image: pgvector/pgvector:pg17
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
@ -83,6 +83,10 @@ services:
ENVIRONMENT: "local"
LOG_LEVEL: "INFO"
# Replace this environment variable if you are using a custom
# domain to host the stack
BACKEND_API_ENDPOINT: "http://localhost:8000"
# Database configuration (using containerized postgres)
DATABASE_URL: "postgresql+asyncpg://postgres:postgres@postgres:5432/postgres"

View file

@ -162,6 +162,10 @@ server {
}
```
### Add environment variable
Replace `BACKEND_API_ENDPOINT` environment variable the `docker-compose.yaml` with your custom domain with the scheme.
### Start Dograh Services
Start Dograh with the updated configuration:

135
evals/stt/README.md Normal file
View file

@ -0,0 +1,135 @@
# STT Evaluation Benchmark
Benchmark for comparing Speech-to-Text providers using **WebSocket streaming** with focus on:
- **Speaker diarization** - identifying who said what
- **Keyterm boosting** - improving recognition of specific terms (Deepgram)
## Providers
| Provider | Diarization | Keyterm Boost | Streaming |
|----------|-------------|---------------|-----------|
| Deepgram | Yes | Yes | WebSocket (v1/v2) |
| Speechmatics | Yes | Additional vocab | WebSocket RT |
## Setup
```bash
# Install dependencies
pip install websockets
# Set API keys
export DEEPGRAM_API_KEY="your-key"
export SPEECHMATICS_API_KEY="your-key"
```
**Note:** Requires `ffmpeg` installed for audio conversion to PCM16.
## Usage
Run from the project root directory:
```bash
# Test both providers with diarization
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
# Test only Deepgram
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
# Test with keyterm boosting (Deepgram)
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
# Use different sample rate (default: 8000 Hz)
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --sample-rate 16000
# Show word-level timings
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --show-words
# Save results to JSON
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --save
```
## CLI Options
| Option | Description |
|--------|-------------|
| `audio_file` | Path to audio file (relative to evals/stt/ or absolute) |
| `--providers` | Providers to test: `deepgram`, `speechmatics` (default: both) |
| `--diarize` | Enable speaker diarization |
| `--keyterms` | Keywords to boost (Deepgram) / additional vocab (Speechmatics) |
| `--language` | Language code (default: en) |
| `--sample-rate` | Audio sample rate for streaming (default: 8000) |
| `--show-words` | Show individual word timings |
| `--save` | Save results to JSON in `results/` |
## Directory Structure
```
evals/stt/
├── audio/ # Audio test files
│ └── multi_speaker.m4a
├── results/ # Saved benchmark results (JSON)
├── providers/ # STT provider implementations
│ ├── base.py # Base classes
│ ├── deepgram_provider.py # WebSocket streaming
│ └── speechmatics_provider.py # WebSocket streaming
├── audio_streamer.py # PCM16 audio file streamer
├── benchmark.py # Main runner script
└── README.md
```
## How It Works
1. **Audio Conversion**: The `AudioStreamer` converts any audio file to raw PCM16 using ffmpeg
2. **WebSocket Connection**: Providers connect to their respective WebSocket APIs
3. **Streaming**: Audio is sent in chunks (configurable sample rate, default 8kHz)
4. **Result Collection**: Transcripts and speaker info are collected from WebSocket responses
5. **Comparison**: Results are parsed into a common format for comparison
## Output Example
```
Audio file: /path/to/audio/multi_speaker.m4a
Providers: ['deepgram', 'speechmatics']
Diarization: True
Sample rate: 8000 Hz
============================================================
Provider: DEEPGRAM
============================================================
Duration: 45.32s
Speakers detected: 2 - ['0', '1']
Transcript:
Hello, welcome to the demo...
--- Speaker Segments ---
[0.0s] Speaker 0: Hello, welcome to the demo.
[2.5s] Speaker 1: Thanks for having me.
...
============================================================
COMPARISON SUMMARY
============================================================
Provider Duration Speakers Words
---------------------------------------------
deepgram 45.32 2 312
speechmatics 45.32 2 308
```
## Adding New Providers
1. Create a new file in `providers/` (e.g., `whisper_provider.py`)
2. Implement the `STTProvider` abstract class with WebSocket streaming
3. Use `AudioStreamer` for PCM16 conversion
4. Add to `providers/__init__.py`
5. Add to `benchmark.py` provider choices
## API Documentation
- Deepgram Streaming: https://developers.deepgram.com/docs/live-streaming-audio
- Deepgram Diarization: https://developers.deepgram.com/docs/diarization
- Deepgram Keyterms: https://developers.deepgram.com/docs/keyterm
- Speechmatics RT API: https://docs.speechmatics.com/rt-api-ref
- Speechmatics Diarization: https://docs.speechmatics.com/features/diarization

1
evals/stt/__init__.py Normal file
View file

@ -0,0 +1 @@
# STT Evaluation Benchmark

Binary file not shown.

BIN
evals/stt/audio/nope.m4a Normal file

Binary file not shown.

Binary file not shown.

BIN
evals/stt/audio/vad.m4a Normal file

Binary file not shown.

BIN
evals/stt/audio/yes.m4a Normal file

Binary file not shown.

140
evals/stt/audio_streamer.py Normal file
View file

@ -0,0 +1,140 @@
"""Audio file streamer - converts audio files to PCM16 streams."""
import asyncio
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import AsyncIterator
@dataclass
class AudioConfig:
"""Audio streaming configuration."""
sample_rate: int = 8000
channels: int = 1
sample_width: int = 2 # 16-bit = 2 bytes
chunk_duration_ms: int = 80 # Send chunks every 80ms
@property
def chunk_size(self) -> int:
"""Bytes per chunk based on duration."""
samples_per_chunk = int(self.sample_rate * self.chunk_duration_ms / 1000)
return samples_per_chunk * self.channels * self.sample_width
class AudioStreamer:
"""Streams audio files as PCM16 chunks.
Converts any audio format to raw PCM16 using ffmpeg and streams
in real-time chunks to simulate live audio.
"""
def __init__(self, config: AudioConfig | None = None):
self.config = config or AudioConfig()
def convert_to_pcm16(self, audio_path: Path) -> bytes:
"""Convert audio file to raw PCM16 bytes using ffmpeg.
Args:
audio_path: Path to input audio file
Returns:
Raw PCM16 audio bytes
"""
cmd = [
"ffmpeg",
"-i",
str(audio_path),
"-f",
"s16le", # signed 16-bit little-endian
"-acodec",
"pcm_s16le",
"-ar",
str(self.config.sample_rate),
"-ac",
str(self.config.channels),
"-", # output to stdout
]
result = subprocess.run(
cmd,
capture_output=True,
check=True,
)
return result.stdout
async def stream_file(
self,
audio_path: Path,
realtime: bool = True,
trailing_silence_seconds: float = 0.0,
) -> AsyncIterator[bytes]:
"""Stream audio file as PCM16 chunks.
Args:
audio_path: Path to audio file
realtime: If True, add delays to simulate real-time streaming
trailing_silence_seconds: Seconds of silence to append after audio ends.
Useful for capturing pending end-of-turn events from STT providers.
Yields:
PCM16 audio chunks
"""
# Convert entire file to PCM16
pcm_data = self.convert_to_pcm16(audio_path)
chunk_size = self.config.chunk_size
delay = self.config.chunk_duration_ms / 1000.0 if realtime else 0
# Stream audio chunks
for i in range(0, len(pcm_data), chunk_size):
chunk = pcm_data[i : i + chunk_size]
if chunk:
yield chunk
if realtime and delay > 0:
await asyncio.sleep(delay)
# Stream trailing silence if requested
if trailing_silence_seconds > 0:
silence_chunk = bytes(chunk_size) # Zero-filled bytes = silence
num_silence_chunks = int(trailing_silence_seconds / (self.config.chunk_duration_ms / 1000.0))
for _ in range(num_silence_chunks):
yield silence_chunk
if realtime and delay > 0:
await asyncio.sleep(delay)
async def stream_file_fast(self, audio_path: Path) -> AsyncIterator[bytes]:
"""Stream audio file as fast as possible (no real-time delay).
Args:
audio_path: Path to audio file
Yields:
PCM16 audio chunks
"""
async for chunk in self.stream_file(audio_path, realtime=False):
yield chunk
def get_duration(self, audio_path: Path) -> float:
"""Get audio file duration in seconds.
Args:
audio_path: Path to audio file
Returns:
Duration in seconds
"""
cmd = [
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
str(audio_path),
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return float(result.stdout.strip())

247
evals/stt/benchmark.py Normal file
View file

@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""STT Benchmark Runner.
Compare speech-to-text transcription across providers with focus on:
- Speaker diarization accuracy
- Keyword/keyterm recognition
- Transcription quality
Usage:
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
"""
import argparse
import asyncio
import json
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
from evals.stt.providers import (
DeepgramProvider,
DeepgramFluxProvider,
SpeechmaticsProvider,
LocalSmartTurnProvider,
STTProvider,
TranscriptionResult,
)
def get_provider(name: str) -> STTProvider:
"""Get provider instance by name."""
providers = {
"deepgram": DeepgramProvider,
"deepgram-flux": DeepgramFluxProvider,
"speechmatics": SpeechmaticsProvider,
"local-smart-turn": LocalSmartTurnProvider,
}
if name not in providers:
raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
return providers[name]()
async def run_transcription(
provider: STTProvider,
audio_path: Path,
diarize: bool = False,
keyterms: list[str] | None = None,
**kwargs: Any,
) -> TranscriptionResult:
"""Run transcription with a provider."""
print(f"\n{'='*60}")
print(f"Provider: {provider.name.upper()}")
print(f"{'='*60}")
try:
result = await provider.transcribe(
audio_path,
diarize=diarize,
keyterms=keyterms,
**kwargs,
)
return result
except Exception as e:
print(f"Error with {provider.name}: {e}")
raise
def print_result(result: TranscriptionResult, show_words: bool = False) -> None:
"""Print transcription result."""
print(f"\nDuration: {result.duration:.2f}s")
print(f"Speakers detected: {len(result.speakers)} - {result.speakers}")
print(f"\nTranscript:\n{result.transcript}")
if result.speakers:
print(f"\n--- Speaker Segments ---")
for segment in result.get_speaker_segments():
speaker = segment["speaker"] or "?"
text = segment["text"]
start = segment["start"]
print(f"[{start:.1f}s] Speaker {speaker}: {text}")
if show_words:
print(f"\n--- Words ---")
for word in result.words[:50]: # First 50 words
speaker_info = f" (S{word.speaker})" if word.speaker else ""
print(f" {word.start:.2f}s: {word.word}{speaker_info} [{word.confidence:.2f}]")
if len(result.words) > 50:
print(f" ... and {len(result.words) - 50} more words")
def save_results(
results: list[TranscriptionResult],
output_dir: Path,
audio_name: str,
) -> Path:
"""Save results to JSON file."""
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = output_dir / f"{audio_name}_{timestamp}.json"
output_data = {
"timestamp": timestamp,
"audio_file": audio_name,
"results": [r.to_dict() for r in results],
}
with open(output_file, "w") as f:
json.dump(output_data, f, indent=2)
print(f"\nResults saved to: {output_file}")
return output_file
def compare_results(results: list[TranscriptionResult]) -> None:
"""Compare results across providers."""
if len(results) < 2:
return
print(f"\n{'='*60}")
print("COMPARISON SUMMARY")
print(f"{'='*60}")
print(f"\n{'Provider':<15} {'Duration':<10} {'Speakers':<10} {'Words':<10}")
print("-" * 45)
for r in results:
print(f"{r.provider:<15} {r.duration:<10.2f} {len(r.speakers):<10} {len(r.words):<10}")
# Compare speaker counts
speaker_counts = {r.provider: len(r.speakers) for r in results}
if len(set(speaker_counts.values())) > 1:
print(f"\nNote: Providers detected different speaker counts: {speaker_counts}")
async def main() -> int:
parser = argparse.ArgumentParser(
description="STT Benchmark - Compare transcription providers",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
python -m evals.stt.benchmark audio/multi_speaker.m4a --keyterms "Dograh" "API"
""",
)
parser.add_argument(
"audio_file",
type=str,
help="Path to audio file (relative to evals/stt/ or absolute)",
)
parser.add_argument(
"--providers",
nargs="+",
default=["deepgram", "speechmatics"],
choices=["deepgram", "deepgram-flux", "speechmatics", "local-smart-turn"],
help="Providers to test (default: all)",
)
parser.add_argument(
"--diarize",
action="store_true",
help="Enable speaker diarization",
)
parser.add_argument(
"--keyterms",
nargs="+",
help="Keywords to boost (Deepgram only)",
)
parser.add_argument(
"--language",
default="en",
help="Language code (default: en)",
)
parser.add_argument(
"--sample-rate",
type=int,
default=8000,
help="Audio sample rate for streaming (default: 8000)",
)
parser.add_argument(
"--show-words",
action="store_true",
help="Show individual word timings",
)
parser.add_argument(
"--save",
action="store_true",
help="Save results to JSON file",
)
parser.add_argument(
"--output-dir",
type=str,
default="results",
help="Output directory for results (default: results)",
)
args = parser.parse_args()
# Resolve audio path
script_dir = Path(__file__).parent
audio_path = Path(args.audio_file)
if not audio_path.is_absolute():
audio_path = script_dir / audio_path
if not audio_path.exists():
print(f"Error: Audio file not found: {audio_path}")
return 1
print(f"Audio file: {audio_path}")
print(f"Providers: {args.providers}")
print(f"Diarization: {args.diarize}")
print(f"Sample rate: {args.sample_rate} Hz")
if args.keyterms:
print(f"Keyterms: {args.keyterms}")
results: list[TranscriptionResult] = []
for provider_name in args.providers:
try:
provider = get_provider(provider_name)
result = await run_transcription(
provider,
audio_path,
diarize=args.diarize,
keyterms=args.keyterms,
language=args.language,
sample_rate=args.sample_rate,
)
print_result(result, show_words=args.show_words)
results.append(result)
except Exception as e:
print(f"\nFailed to run {provider_name}: {e}")
continue
if len(results) > 1:
compare_results(results)
if args.save and results:
output_dir = script_dir / args.output_dir
save_results(results, output_dir, audio_path.stem)
return 0
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

251
evals/stt/event_capture.py Normal file
View file

@ -0,0 +1,251 @@
#!/usr/bin/env python3
"""STT Event Capture Runner.
Streams audio to STT providers and captures raw WebSocket events with timestamps
for visualization in the web UI.
Usage:
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics
"""
import argparse
import asyncio
import json
import sys
from dataclasses import asdict, dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Callable
from evals.stt.audio_streamer import AudioStreamer
from evals.stt.providers import (
DeepgramFluxProvider,
DeepgramProvider,
SpeechmaticsProvider,
STTProvider,
)
@dataclass
class CapturedEvent:
"""A captured WebSocket event with timestamp."""
timestamp: float # Time since stream start (seconds)
event_type: str # e.g., "Results", "TurnInfo", "AddTranscript"
data: dict[str, Any] # Raw event payload
def to_dict(self) -> dict[str, Any]:
return {
"timestamp": self.timestamp,
"event_type": self.event_type,
"data": self.data,
}
@dataclass
class EventCaptureResult:
"""Result from event capture session."""
audio_file: str
audio_path: str # Relative path to audio from results dir
provider: str
duration: float
created_at: str
events: list[CapturedEvent] = field(default_factory=list)
transcript: str = "" # Final transcript for reference
def to_dict(self) -> dict[str, Any]:
return {
"audio_file": self.audio_file,
"audio_path": self.audio_path,
"provider": self.provider,
"duration": self.duration,
"created_at": self.created_at,
"events": [e.to_dict() for e in self.events],
"transcript": self.transcript,
}
EventCallback = Callable[[str, dict[str, Any]], None]
def get_provider(name: str) -> STTProvider:
"""Get provider instance by name."""
providers = {
"deepgram": DeepgramProvider,
"deepgram-flux": DeepgramFluxProvider,
"speechmatics": SpeechmaticsProvider,
}
if name not in providers:
raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
return providers[name]()
async def capture_events(
provider: STTProvider,
audio_path: Path,
sample_rate: int = 8000,
**kwargs: Any,
) -> EventCaptureResult:
"""Capture WebSocket events from a provider.
Args:
provider: The STT provider to use
audio_path: Path to the audio file
sample_rate: Audio sample rate
**kwargs: Additional provider parameters
Returns:
EventCaptureResult with all captured events
"""
# Get audio duration
streamer = AudioStreamer()
duration = streamer.get_duration(audio_path)
# Event list and start time
events: list[CapturedEvent] = []
start_time: float | None = None
def on_event(event_type: str, data: dict[str, Any]) -> None:
"""Callback for capturing events."""
nonlocal start_time
if start_time is None:
start_time = asyncio.get_event_loop().time()
timestamp = asyncio.get_event_loop().time() - start_time
events.append(CapturedEvent(timestamp=timestamp, event_type=event_type, data=data))
# Run transcription with event callback
result = await provider.transcribe(
audio_path,
sample_rate=sample_rate,
on_event=on_event,
**kwargs,
)
return EventCaptureResult(
audio_file=audio_path.name,
audio_path=f"../audio/{audio_path.name}",
provider=provider.name,
duration=duration,
created_at=datetime.now().isoformat(),
events=events,
transcript=result.transcript,
)
def save_result(result: EventCaptureResult, output_dir: Path) -> Path:
"""Save capture result to JSON file.
Args:
result: The capture result to save
output_dir: Directory to save results
Returns:
Path to the saved file
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Format: {audio_name}-{provider}.json
audio_name = Path(result.audio_file).stem
output_file = output_dir / f"{audio_name}-{result.provider}.json"
with open(output_file, "w") as f:
json.dump(result.to_dict(), f, indent=2)
return output_file
async def main() -> int:
parser = argparse.ArgumentParser(
description="STT Event Capture - Capture WebSocket events for visualization",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics --diarize
""",
)
parser.add_argument(
"audio_file",
type=str,
help="Path to audio file (relative to evals/stt/ or absolute)",
)
parser.add_argument(
"--provider",
required=True,
choices=["deepgram", "deepgram-flux", "speechmatics"],
help="STT provider to use",
)
parser.add_argument(
"--sample-rate",
type=int,
default=8000,
help="Audio sample rate for streaming (default: 8000)",
)
parser.add_argument(
"--diarize",
action="store_true",
help="Enable speaker diarization",
)
parser.add_argument(
"--output-dir",
type=str,
default="results",
help="Output directory for results (default: results)",
)
args = parser.parse_args()
# Resolve audio path
script_dir = Path(__file__).parent
audio_path = Path(args.audio_file)
if not audio_path.is_absolute():
audio_path = script_dir / audio_path
if not audio_path.exists():
print(f"Error: Audio file not found: {audio_path}")
return 1
print(f"Audio file: {audio_path}")
print(f"Provider: {args.provider}")
print(f"Sample rate: {args.sample_rate} Hz")
print(f"Diarization: {args.diarize}")
try:
provider = get_provider(args.provider)
print(f"\nCapturing events from {provider.name}...")
result = await capture_events(
provider,
audio_path,
sample_rate=args.sample_rate,
diarize=args.diarize,
)
output_dir = script_dir / args.output_dir
output_file = save_result(result, output_dir)
print(f"\nCapture complete!")
print(f" Duration: {result.duration:.2f}s")
print(f" Events: {len(result.events)}")
print(f" Saved to: {output_file}")
# Show first few events
print(f"\nFirst 5 events:")
for event in result.events[:5]:
print(f" [{event.timestamp:.2f}s] {event.event_type}")
return 0
except Exception as e:
print(f"\nError: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(asyncio.run(main()))

View file

@ -0,0 +1,16 @@
from .base import EventCallback, STTProvider, TranscriptionResult, Word
from .deepgram_provider import DeepgramProvider
from .deepgram_flux_provider import DeepgramFluxProvider
from .speechmatics_provider import SpeechmaticsProvider
from .local_smart_turn_provider import LocalSmartTurnProvider
__all__ = [
"EventCallback",
"STTProvider",
"TranscriptionResult",
"Word",
"DeepgramProvider",
"DeepgramFluxProvider",
"SpeechmaticsProvider",
"LocalSmartTurnProvider",
]

128
evals/stt/providers/base.py Normal file
View file

@ -0,0 +1,128 @@
"""Base classes for STT providers."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable
# Event callback type: (event_type, data) -> None
EventCallback = Callable[[str, dict[str, Any]], None]
@dataclass
class Word:
"""Represents a transcribed word with metadata."""
word: str
start: float
end: float
confidence: float
speaker: str | None = None
speaker_confidence: float | None = None
def to_dict(self) -> dict[str, Any]:
return {
"word": self.word,
"start": self.start,
"end": self.end,
"confidence": self.confidence,
"speaker": self.speaker,
"speaker_confidence": self.speaker_confidence,
}
@dataclass
class TranscriptionResult:
"""Result from STT transcription."""
provider: str
transcript: str
words: list[Word]
speakers: list[str]
duration: float
raw_response: dict[str, Any] = field(default_factory=dict)
params: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"provider": self.provider,
"transcript": self.transcript,
"words": [w.to_dict() for w in self.words],
"speakers": self.speakers,
"duration": self.duration,
"params": self.params,
}
def get_speaker_segments(self) -> list[dict[str, Any]]:
"""Get transcript segmented by speaker."""
if not self.words:
return []
segments = []
current_speaker = None
current_text = []
segment_start = 0.0
for word in self.words:
if word.speaker != current_speaker:
if current_text:
segments.append(
{
"speaker": current_speaker,
"text": " ".join(current_text),
"start": segment_start,
"end": self.words[len(segments) - 1].end
if segments
else word.start,
}
)
current_speaker = word.speaker
current_text = [word.word]
segment_start = word.start
else:
current_text.append(word.word)
if current_text:
segments.append(
{
"speaker": current_speaker,
"text": " ".join(current_text),
"start": segment_start,
"end": self.words[-1].end if self.words else 0.0,
}
)
return segments
class STTProvider(ABC):
"""Abstract base class for STT providers."""
@property
@abstractmethod
def name(self) -> str:
"""Provider name."""
pass
@abstractmethod
async def transcribe(
self,
audio_path: Path,
diarize: bool = False,
keyterms: list[str] | None = None,
on_event: EventCallback | None = None,
**kwargs: Any,
) -> TranscriptionResult:
"""Transcribe audio file.
Args:
audio_path: Path to the audio file
diarize: Enable speaker diarization
keyterms: List of keywords to boost (if supported)
on_event: Optional callback for raw WebSocket events (event_type, data)
**kwargs: Provider-specific parameters
Returns:
TranscriptionResult with transcript and metadata
"""
pass

View file

@ -0,0 +1,235 @@
"""Deepgram Flux STT provider with WebSocket streaming.
Flux is Deepgram's conversational AI model with built-in turn detection.
It has a different API than Nova models - no language/punctuate/diarize params.
"""
import asyncio
import json
import os
from pathlib import Path
from typing import Any
from urllib.parse import urlencode
from loguru import logger
from ..audio_streamer import AudioConfig, AudioStreamer
from .base import EventCallback, STTProvider, TranscriptionResult, Word
try:
from websockets.asyncio.client import connect as websocket_connect
except ImportError:
raise ImportError("websockets required: pip install websockets")
class DeepgramFluxProvider(STTProvider):
"""Deepgram Flux Speech-to-Text provider with WebSocket streaming.
Flux is optimized for conversational AI with built-in turn detection.
Key differences from Nova:
- Uses v2 API endpoint
- Only supports English (flux-general-en)
- No punctuate, diarize, or language params
- Has turn detection events (StartOfTurn, EndOfTurn, EagerEndOfTurn)
- Supports keyterm boosting
API Docs: https://developers.deepgram.com/docs/
"""
WS_URL = "wss://api.deepgram.com/v2/listen"
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
if not self.api_key:
raise ValueError(
"Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
)
@property
def name(self) -> str:
return "deepgram-flux"
async def transcribe(
self,
audio_path: Path,
diarize: bool = False, # Ignored - Flux doesn't support diarization
keyterms: list[str] | None = None,
on_event: EventCallback | None = None,
model: str = "flux-general-en",
sample_rate: int = 16000,
eot_threshold: float | None = 0.70,
eot_timeout_ms: int | None = 3000,
eager_eot_threshold: float | None = None,
trailing_silence_seconds: float = 3.0,
**kwargs: Any,
) -> TranscriptionResult:
"""Transcribe audio using Deepgram Flux WebSocket streaming.
Args:
audio_path: Path to audio file
diarize: IGNORED - Flux does not support diarization
keyterms: List of keywords to boost recognition
on_event: Optional callback for raw WebSocket events
model: Flux model (default: flux-general-en)
sample_rate: Audio sample rate (default: 16000 for Flux)
eot_threshold: End-of-turn confidence threshold (0-1, default 0.7)
eot_timeout_ms: Timeout in ms to force end of turn (default 5000)
eager_eot_threshold: Threshold for eager end-of-turn events
trailing_silence_seconds: Seconds of silence after audio to capture pending events
**kwargs: Additional Flux parameters
Returns:
TranscriptionResult with transcript (no speaker info - Flux doesn't support diarization)
"""
if diarize:
logger.warning("Flux does not support diarization - ignoring diarize=True")
# Build query params - Flux only supports specific params
params: dict[str, Any] = {
"model": model,
"encoding": "linear16",
"sample_rate": sample_rate,
}
# Flux-specific turn detection params
if eot_threshold is not None:
params["eot_threshold"] = eot_threshold
if eot_timeout_ms is not None:
params["eot_timeout_ms"] = eot_timeout_ms
if eager_eot_threshold is not None:
params["eager_eot_threshold"] = eager_eot_threshold
# Build URL with params
url_parts = [f"{k}={v}" for k, v in params.items()]
# Add keyterms (repeated params)
if keyterms:
for term in keyterms:
url_parts.append(urlencode({"keyterm": term}))
ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
logger.debug(f"Flux WebSocket URL: {ws_url}")
# Setup audio streamer
audio_config = AudioConfig(sample_rate=sample_rate)
streamer = AudioStreamer(audio_config)
# Collect results
all_transcripts: list[dict[str, Any]] = []
final_transcript = ""
duration = 0.0
connected = asyncio.Event()
async with websocket_connect(
ws_url,
additional_headers={"Authorization": f"Token {self.api_key}"},
) as ws:
async def send_audio():
"""Send audio chunks to Deepgram Flux."""
await connected.wait()
chunk_no = 0
async for chunk in streamer.stream_file(
audio_path, trailing_silence_seconds=trailing_silence_seconds
):
logger.trace(f"[deepgram-flux] Sent audio chunk {chunk_no}")
await ws.send(chunk)
chunk_no += 1
async def receive_messages():
"""Receive and collect Flux messages."""
nonlocal all_transcripts, final_transcript, duration
async for message in ws:
if isinstance(message, str):
data = json.loads(message)
msg_type = data.get("type")
logger.debug(f"[deepgram-flux] Received {msg_type}: {data}")
# Emit event via callback if provided
if on_event and msg_type:
on_event(msg_type, data)
if msg_type == "Connected":
logger.info("[deepgram-flux] Connected")
connected.set()
elif msg_type == "TurnInfo":
event = data.get("event")
transcript = data.get("transcript", "")
words = data.get("words", [])
if event == "EndOfTurn":
if transcript:
final_transcript += transcript + " "
if words:
all_transcripts.append({
"transcript": transcript,
"words": words,
})
# Get duration from last word
if words:
last_word = words[-1]
duration = max(duration, last_word.get("end", 0))
elif event == "TurnResumed":
logger.debug("TurnResumed")
elif msg_type == "Error":
raise Exception(f"Deepgram Flux error: {data}")
# Run send and receive concurrently
send_task = asyncio.create_task(send_audio())
receive_task = asyncio.create_task(receive_messages())
await send_task
logger.debug("[deepgram-flux] Send task done")
try:
await asyncio.wait_for(receive_task, timeout=10.0)
except asyncio.TimeoutError:
pass
return self._parse_results(
all_transcripts, final_transcript.strip(), duration, params, keyterms
)
def _parse_results(
self,
transcripts: list[dict[str, Any]],
final_transcript: str,
duration: float,
params: dict[str, Any],
keyterms: list[str] | None,
) -> TranscriptionResult:
"""Parse collected Flux results into TranscriptionResult."""
words = []
for turn in transcripts:
for w in turn.get("words", []):
words.append(
Word(
word=w.get("word", ""),
start=w.get("start", 0.0),
end=w.get("end", 0.0),
confidence=w.get("confidence", 0.0),
speaker=None, # Flux doesn't support diarization
speaker_confidence=None,
)
)
stored_params = dict(params)
if keyterms:
stored_params["keyterms"] = keyterms
return TranscriptionResult(
provider=self.name,
transcript=final_transcript,
words=words,
speakers=[], # Flux doesn't support diarization
duration=duration,
raw_response={"transcripts": transcripts},
params=stored_params,
)

View file

@ -0,0 +1,236 @@
"""Deepgram STT provider with WebSocket streaming."""
import asyncio
import json
import os
from pathlib import Path
from typing import Any
from urllib.parse import urlencode
from ..audio_streamer import AudioConfig, AudioStreamer
from .base import EventCallback, STTProvider, TranscriptionResult, Word
from loguru import logger
try:
from websockets.asyncio.client import connect as websocket_connect
except ImportError:
raise ImportError("websockets required: pip install websockets")
class DeepgramProvider(STTProvider):
"""Deepgram Nova Speech-to-Text provider with WebSocket streaming.
API Docs: https://developers.deepgram.com/docs/
Supports:
- Speaker diarization via `diarize=true`
- Keyterm boosting via `keyterm` parameter
- Real-time streaming via WebSocket
- Multiple languages
- Punctuation
For Flux models, use DeepgramFluxProvider instead.
"""
WS_URL = "wss://api.deepgram.com/v1/listen"
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
if not self.api_key:
raise ValueError(
"Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
)
@property
def name(self) -> str:
return "deepgram"
async def transcribe(
self,
audio_path: Path,
diarize: bool = False,
keyterms: list[str] | None = None,
on_event: EventCallback | None = None,
model: str = "nova-3-general",
language: str = "en",
sample_rate: int = 8000,
punctuate: bool = True,
trailing_silence_seconds: float = 3.0,
**kwargs: Any,
) -> TranscriptionResult:
"""Transcribe audio using Deepgram Nova WebSocket streaming.
Args:
audio_path: Path to audio file
diarize: Enable speaker diarization
keyterms: List of keywords to boost recognition
on_event: Optional callback for raw WebSocket events
model: Deepgram Nova model (nova-3, nova-2, etc.)
language: Language code
sample_rate: Audio sample rate for streaming
punctuate: Add punctuation
trailing_silence_seconds: Seconds of silence after audio to capture pending events
**kwargs: Additional Deepgram parameters
Returns:
TranscriptionResult with transcript and speaker info
"""
# Build query params
params: dict[str, Any] = {
"model": model,
"language": language,
"punctuate": str(punctuate).lower(),
"encoding": "linear16",
"sample_rate": sample_rate,
"channels": 1,
"interim_results": "true",
"smart_format": "true",
"profanity_filter": "true",
"vad_events": "true",
"utterance_end_ms": "1000"
}
if diarize:
params["diarize"] = "true"
# Build URL with params
url_parts = [f"{k}={v}" for k, v in params.items()]
# Add keyterms (repeated params)
if keyterms:
for term in keyterms:
url_parts.append(urlencode({"keyterm": term}))
# Add extra kwargs
for k, v in kwargs.items():
url_parts.append(f"{k}={v}")
ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
logger.debug(f"Deepgram WebSocket URL: {ws_url}")
# Setup audio streamer
audio_config = AudioConfig(sample_rate=sample_rate)
streamer = AudioStreamer(audio_config)
# Collect results
all_words: list[dict[str, Any]] = []
final_transcript = ""
duration = 0.0
try:
async with websocket_connect(
ws_url,
additional_headers={"Authorization": f"Token {self.api_key}"},
) as ws:
# Create tasks for sending and receiving
send_complete = asyncio.Event()
async def send_audio():
"""Send audio chunks to Deepgram."""
chunk_no = 0
async for chunk in streamer.stream_file(
audio_path, trailing_silence_seconds=trailing_silence_seconds
):
logger.trace(f"[deepgram] Sent audio chunk {chunk_no}")
await ws.send(chunk)
chunk_no += 1
# Send close message
logger.debug(f"[deepgram] Sending CloseStream after {chunk_no} chunks")
await ws.send(json.dumps({"type": "CloseStream"}))
send_complete.set()
async def receive_transcripts():
"""Receive and collect transcription results."""
nonlocal all_words, final_transcript, duration
async for message in ws:
if isinstance(message, str):
data = json.loads(message)
msg_type = data.get("type")
logger.debug(f"[deepgram] Received {msg_type}: {data}")
# Emit event via callback if provided
if on_event and msg_type:
on_event(msg_type, data)
if msg_type == "Results":
# Nova-style response
channel = data.get("channel", {})
alternatives = channel.get("alternatives", [])
if alternatives:
alt = alternatives[0]
words = alt.get("words", [])
all_words.extend(words)
# Check if final
if data.get("is_final"):
final_transcript += alt.get("transcript", "") + " "
duration = max(
duration, data.get("duration", 0) + data.get("start", 0)
)
elif msg_type == "Metadata":
# Get duration from metadata
duration = data.get("duration", duration)
elif msg_type == "Error":
raise Exception(f"Deepgram error: {data}")
# Run send and receive concurrently
send_task = asyncio.create_task(send_audio())
receive_task = asyncio.create_task(receive_transcripts())
# Wait for send to complete, then wait a bit for final results
await send_task
try:
await asyncio.wait_for(receive_task, timeout=5.0)
except asyncio.TimeoutError:
pass # Normal - websocket closes after final results
except Exception as e:
logger.exception(e)
return self._parse_results(
all_words, final_transcript.strip(), duration, params, keyterms
)
def _parse_results(
self,
raw_words: list[dict[str, Any]],
transcript: str,
duration: float,
params: dict[str, Any],
keyterms: list[str] | None,
) -> TranscriptionResult:
"""Parse collected results into TranscriptionResult."""
words = []
speakers_set: set[str] = set()
for w in raw_words:
speaker = str(w.get("speaker", "")) if "speaker" in w else None
if speaker:
speakers_set.add(speaker)
words.append(
Word(
word=w.get("word", ""),
start=w.get("start", 0.0),
end=w.get("end", 0.0),
confidence=w.get("confidence", 0.0),
speaker=speaker,
speaker_confidence=w.get("speaker_confidence"),
)
)
stored_params = dict(params)
if keyterms:
stored_params["keyterms"] = keyterms
return TranscriptionResult(
provider=self.name,
transcript=transcript,
words=words,
speakers=sorted(speakers_set),
duration=duration,
raw_response={"words": raw_words},
params=stored_params,
)

View file

@ -0,0 +1,287 @@
"""Local Smart Turn provider for benchmarking end-of-turn detection.
Uses the pipecat smart-turn-v3 ONNX model for local ML-based turn detection.
This is NOT an STT provider - it only detects when a speaker has finished talking.
"""
import os
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import numpy as np
from loguru import logger
from ..audio_streamer import AudioConfig, AudioStreamer
from .base import EventCallback, STTProvider, TranscriptionResult, Word
try:
import onnxruntime as ort
from transformers import WhisperFeatureExtractor
except ImportError:
raise ImportError(
"onnxruntime and transformers required: pip install onnxruntime transformers"
)
@dataclass
class TurnEvent:
"""Represents a detected turn event."""
timestamp: float # Time in audio when turn was detected
probability: float # Model confidence
prediction: int # 1=complete, 0=incomplete
inference_time_ms: float
class LocalSmartTurnProvider(STTProvider):
"""Local Smart Turn provider for end-of-turn detection benchmarking.
Uses the smart-turn-v3 ONNX model to detect when speakers finish talking.
This is useful for comparing turn detection accuracy against cloud services
like Deepgram Flux's built-in turn detection.
NOTE: This provider does NOT produce transcripts - only turn detection events.
"""
# Smart turn model requires 16kHz audio
REQUIRED_SAMPLE_RATE = 16000
# Model analyzes 8 seconds of audio
WINDOW_SECONDS = 8
def __init__(
self,
model_path: str | None = None,
cpu_count: int = 1,
):
"""Initialize the local smart turn provider.
Args:
model_path: Path to ONNX model file. If None, uses bundled model.
cpu_count: Number of CPUs for inference (default: 1)
"""
self.model_path = model_path
self.cpu_count = cpu_count
self._session = None
self._feature_extractor = None
def _load_model(self):
"""Lazy load the ONNX model."""
if self._session is not None:
return
model_path = self.model_path
if not model_path:
# Try to load bundled model from pipecat
model_name = "smart-turn-v3.1-cpu.onnx"
package_path = "pipecat.audio.turn.smart_turn.data"
try:
import importlib_resources as impresources
model_path = str(impresources.files(package_path).joinpath(model_name))
except Exception:
from importlib import resources as impresources
try:
with impresources.path(package_path, model_name) as f:
model_path = str(f)
except Exception:
model_path = str(impresources.files(package_path).joinpath(model_name))
logger.info(f"[local-smart-turn] Loading model from {model_path}")
# Configure ONNX runtime
so = ort.SessionOptions()
so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
so.inter_op_num_threads = 1
so.intra_op_num_threads = self.cpu_count
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
self._session = ort.InferenceSession(model_path, sess_options=so)
logger.info("[local-smart-turn] Model loaded")
@property
def name(self) -> str:
return "local-smart-turn"
def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using the ONNX model.
Args:
audio_array: Audio samples as float32 numpy array (16kHz)
Returns:
Dict with prediction (0/1) and probability
"""
# Truncate to last 8 seconds or pad to 8 seconds
max_samples = self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE
if len(audio_array) > max_samples:
audio_array = audio_array[-max_samples:]
elif len(audio_array) < max_samples:
padding = max_samples - len(audio_array)
audio_array = np.pad(audio_array, (padding, 0), mode="constant", constant_values=0)
# Process using Whisper's feature extractor
inputs = self._feature_extractor(
audio_array,
sampling_rate=self.REQUIRED_SAMPLE_RATE,
return_tensors="np",
padding="max_length",
max_length=self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE,
truncation=True,
do_normalize=True,
)
# Extract features for ONNX
input_features = inputs.input_features.squeeze(0).astype(np.float32)
input_features = np.expand_dims(input_features, axis=0)
# Run inference
start_time = time.perf_counter()
outputs = self._session.run(None, {"input_features": input_features})
inference_time = (time.perf_counter() - start_time) * 1000
# Extract probability (model returns sigmoid probabilities)
probability = outputs[0][0].item()
prediction = 1 if probability > 0.5 else 0
return {
"prediction": prediction,
"probability": probability,
"inference_time_ms": inference_time,
}
async def transcribe(
self,
audio_path: Path,
diarize: bool = False, # Ignored - not applicable
keyterms: list[str] | None = None, # Ignored - not applicable
on_event: EventCallback | None = None, # Ignored - not applicable
sample_rate: int = 16000, # Must be 16kHz for smart turn
analysis_interval_ms: int = 500, # How often to check for turn completion
**kwargs: Any,
) -> TranscriptionResult:
"""Analyze audio for turn detection events.
NOTE: This does NOT produce transcripts. It detects when speakers
finish talking using ML-based turn detection.
Args:
audio_path: Path to audio file
diarize: Ignored (not applicable for turn detection)
keyterms: Ignored (not applicable for turn detection)
on_event: Ignored (not applicable for turn detection)
sample_rate: Must be 16000 Hz for smart turn model
analysis_interval_ms: How often to run turn detection (ms)
**kwargs: Additional parameters (ignored)
Returns:
TranscriptionResult with turn detection events in raw_response
"""
if sample_rate != self.REQUIRED_SAMPLE_RATE:
logger.warning(
f"[local-smart-turn] Sample rate must be {self.REQUIRED_SAMPLE_RATE}Hz, "
f"overriding {sample_rate}Hz"
)
sample_rate = self.REQUIRED_SAMPLE_RATE
# Load model if not already loaded
self._load_model()
# Setup audio streamer at 16kHz
audio_config = AudioConfig(sample_rate=sample_rate)
streamer = AudioStreamer(audio_config)
# Get audio duration
duration = streamer.get_duration(audio_path)
logger.info(f"[local-smart-turn] Processing {audio_path} ({duration:.2f}s)")
# Collect all audio first (smart turn needs to analyze segments)
pcm_data = streamer.convert_to_pcm16(audio_path)
# Convert to float32 for model
audio_int16 = np.frombuffer(pcm_data, dtype=np.int16)
audio_float32 = audio_int16.astype(np.float32) / 32768.0
# Analyze at intervals
turn_events: list[TurnEvent] = []
samples_per_interval = int(sample_rate * analysis_interval_ms / 1000)
window_samples = self.WINDOW_SECONDS * sample_rate
chunk_no = 0
for end_sample in range(samples_per_interval, len(audio_float32), samples_per_interval):
# Get window of audio ending at current position
start_sample = max(0, end_sample - window_samples)
audio_window = audio_float32[start_sample:end_sample]
current_time = end_sample / sample_rate
logger.debug(f"[local-smart-turn] Analyzing chunk {chunk_no} at {current_time:.2f}s")
result = self._predict_endpoint(audio_window)
turn_events.append(TurnEvent(
timestamp=current_time,
probability=result["probability"],
prediction=result["prediction"],
inference_time_ms=result["inference_time_ms"],
))
if result["prediction"] == 1:
logger.info(
f"[local-smart-turn] Turn complete at {current_time:.2f}s "
f"(prob={result['probability']:.3f})"
f"(inf time ms={result["inference_time_ms"]})"
)
chunk_no += 1
# Create result
# Convert turn events to word-like format for compatibility
words = []
for event in turn_events:
if event.prediction == 1:
words.append(Word(
word=f"[END_OF_TURN prob={event.probability:.2f}]",
start=event.timestamp - 0.1,
end=event.timestamp,
confidence=event.probability,
speaker=None,
speaker_confidence=None,
))
# Count completed turns
completed_turns = sum(1 for e in turn_events if e.prediction == 1)
params = {
"sample_rate": sample_rate,
"analysis_interval_ms": analysis_interval_ms,
"window_seconds": self.WINDOW_SECONDS,
}
return TranscriptionResult(
provider=self.name,
transcript=f"[Turn detection only - {completed_turns} turns detected]",
words=words,
speakers=[], # Not applicable
duration=duration,
raw_response={
"turn_events": [
{
"timestamp": e.timestamp,
"probability": e.probability,
"prediction": e.prediction,
"inference_time_ms": e.inference_time_ms,
}
for e in turn_events
],
"completed_turns": completed_turns,
"total_analyses": len(turn_events),
"avg_inference_time_ms": (
sum(e.inference_time_ms for e in turn_events) / len(turn_events)
if turn_events else 0
),
},
params=params,
)

View file

@ -0,0 +1,258 @@
"""Speechmatics STT provider with WebSocket streaming."""
import asyncio
import json
import os
from pathlib import Path
from typing import Any
from loguru import logger
from ..audio_streamer import AudioConfig, AudioStreamer
from .base import EventCallback, STTProvider, TranscriptionResult, Word
try:
from websockets.asyncio.client import connect as websocket_connect
except ImportError:
raise ImportError("websockets required: pip install websockets")
class SpeechmaticsProvider(STTProvider):
"""Speechmatics Speech-to-Text provider with WebSocket streaming.
API Docs: https://docs.speechmatics.com/
Supports:
- Speaker diarization via `diarization: "speaker"` config
- Speaker sensitivity tuning
- Real-time streaming via WebSocket
"""
def __init__(self, api_key: str | None = None, region: str = "eu2"):
self.api_key = api_key or os.getenv("SPEECHMATICS_API_KEY")
if not self.api_key:
raise ValueError(
"Speechmatics API key required. Set SPEECHMATICS_API_KEY env var or pass api_key."
)
# Set region-specific endpoint
self.ws_url = f"wss://{region}.rt.speechmatics.com/v2"
@property
def name(self) -> str:
return "speechmatics"
async def transcribe(
self,
audio_path: Path,
diarize: bool = False,
keyterms: list[str] | None = None,
on_event: EventCallback | None = None,
language: str = "en",
operating_point: str = "enhanced",
sample_rate: int = 8000,
speaker_sensitivity: float | None = None,
max_speakers: int | None = None,
trailing_silence_seconds: float = 3.0,
**kwargs: Any,
) -> TranscriptionResult:
"""Transcribe audio using Speechmatics WebSocket streaming.
Args:
audio_path: Path to audio file
diarize: Enable speaker diarization
keyterms: Additional vocabulary (limited support)
on_event: Optional callback for raw WebSocket events
language: Language code
operating_point: "standard" or "enhanced"
sample_rate: Audio sample rate for streaming
speaker_sensitivity: 0.0-1.0, higher = more speakers detected
max_speakers: Maximum number of speakers to detect
trailing_silence_seconds: Seconds of silence after audio to capture pending events
**kwargs: Additional config parameters
Returns:
TranscriptionResult with transcript and speaker info
"""
# Build transcription config for StartRecognition message
transcription_config: dict[str, Any] = {
"language": language,
"operating_point": operating_point,
"enable_partials": False,
}
if diarize:
transcription_config["diarization"] = "speaker"
if speaker_sensitivity is not None:
transcription_config["speaker_diarization_config"] = {
"speaker_sensitivity": speaker_sensitivity
}
if max_speakers is not None:
if "speaker_diarization_config" not in transcription_config:
transcription_config["speaker_diarization_config"] = {}
transcription_config["speaker_diarization_config"]["max_speakers"] = max_speakers
# Add additional vocabulary if provided
if keyterms:
transcription_config["additional_vocab"] = [{"content": term} for term in keyterms]
# Audio format config
audio_format = {
"type": "raw",
"encoding": "pcm_s16le",
"sample_rate": sample_rate,
}
# Store params for result
params = {
"diarize": diarize,
"language": language,
"operating_point": operating_point,
"sample_rate": sample_rate,
"speaker_sensitivity": speaker_sensitivity,
"max_speakers": max_speakers,
}
# Setup audio streamer
audio_config = AudioConfig(sample_rate=sample_rate)
streamer = AudioStreamer(audio_config)
# Collect results
all_results: list[dict[str, Any]] = []
recognition_started = asyncio.Event()
transcription_complete = asyncio.Event()
async with websocket_connect(
self.ws_url,
additional_headers={"Authorization": f"Bearer {self.api_key}"},
) as ws:
# Send StartRecognition message
start_msg = {
"message": "StartRecognition",
"transcription_config": transcription_config,
"audio_format": audio_format,
}
await ws.send(json.dumps(start_msg))
async def send_audio():
"""Send audio chunks after recognition starts."""
await recognition_started.wait()
chunk_no = 0
async for chunk in streamer.stream_file(
audio_path, trailing_silence_seconds=trailing_silence_seconds
):
logger.debug(f"[speechmatics] Sent audio chunk {chunk_no}")
await ws.send(chunk)
chunk_no += 1
# Signal end of audio with last sequence number
logger.debug(f"[speechmatics] Sending EndOfStream after {chunk_no} chunks")
await ws.send(json.dumps({"message": "EndOfStream", "last_seq_no": chunk_no}))
async def receive_messages():
"""Receive and process messages."""
nonlocal all_results
async for message in ws:
if isinstance(message, str):
data = json.loads(message)
msg_type = data.get("message")
logger.debug(f"[speechmatics] Received {msg_type}: {data}")
# Emit event via callback if provided
if on_event and msg_type:
on_event(msg_type, data)
if msg_type == "RecognitionStarted":
logger.info("[speechmatics] Connected")
recognition_started.set()
elif msg_type == "AddTranscript":
# Final transcript segment
results = data.get("results", [])
all_results.extend(results)
elif msg_type == "EndOfTranscript":
transcription_complete.set()
return
elif msg_type == "Error":
raise Exception(f"Speechmatics error: {data}")
elif msg_type == "Warning":
logger.warning(f"[speechmatics] Warning: {data.get('reason')}")
# Run send and receive concurrently
send_task = asyncio.create_task(send_audio())
receive_task = asyncio.create_task(receive_messages())
# Wait for completion
await send_task
try:
await asyncio.wait_for(transcription_complete.wait(), timeout=30.0)
except asyncio.TimeoutError:
pass
receive_task.cancel()
try:
await receive_task
except asyncio.CancelledError:
pass
return self._parse_results(all_results, params)
def _parse_results(
self,
results: list[dict[str, Any]],
params: dict[str, Any],
) -> TranscriptionResult:
"""Parse Speechmatics results."""
words = []
speakers_set: set[str] = set()
transcript_parts = []
duration = 0.0
for item in results:
item_type = item.get("type")
alternatives = item.get("alternatives", [])
if not alternatives:
continue
alt = alternatives[0]
content = alt.get("content", "")
speaker = alt.get("speaker")
if speaker:
speakers_set.add(speaker)
end_time = item.get("end_time", 0.0)
duration = max(duration, end_time)
if item_type == "word":
words.append(
Word(
word=content,
start=item.get("start_time", 0.0),
end=end_time,
confidence=alt.get("confidence", 0.0),
speaker=speaker,
speaker_confidence=None,
)
)
transcript_parts.append(content)
elif item_type == "punctuation":
if transcript_parts:
transcript_parts[-1] += content
transcript = " ".join(transcript_parts)
return TranscriptionResult(
provider=self.name,
transcript=transcript,
words=words,
speakers=sorted(speakers_set),
duration=duration,
raw_response={"results": results},
params=params,
)

View file

@ -0,0 +1,867 @@
{
"audio_file": "multi_speaker.m4a",
"audio_path": "../audio/multi_speaker.m4a",
"provider": "deepgram-flux",
"duration": 7.987664,
"created_at": "2026-01-20T12:21:59.183902",
"events": [
{
"timestamp": 3.1916191801428795e-05,
"event_type": "Connected",
"data": {
"type": "Connected",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"sequence_id": 0
}
},
{
"timestamp": 0.6468284581787884,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2195,
"sequence_id": 1
}
},
{
"timestamp": 0.8891876661218703,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.167,
"sequence_id": 2
}
},
{
"timestamp": 1.0987569580320269,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1045,
"sequence_id": 3
}
},
{
"timestamp": 1.356455208035186,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.3054,
"sequence_id": 4
}
},
{
"timestamp": 1.6076077912002802,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.2,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2996,
"sequence_id": 5
}
},
{
"timestamp": 1.831926790997386,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.44,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1659,
"sequence_id": 6
}
},
{
"timestamp": 2.0988957500085235,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.6800001,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0922,
"sequence_id": 7
}
},
{
"timestamp": 2.320036916062236,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.9200001,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1154,
"sequence_id": 8
}
},
{
"timestamp": 2.5783222501631826,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.16,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0789,
"sequence_id": 9
}
},
{
"timestamp": 2.805098250042647,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.4,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.028,
"sequence_id": 10
}
},
{
"timestamp": 3.0677467910572886,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.6399999,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0544,
"sequence_id": 11
}
},
{
"timestamp": 3.3053550410550088,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.88,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0221,
"sequence_id": 12
}
},
{
"timestamp": 3.5730851250700653,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 3.12,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0896,
"sequence_id": 13
}
},
{
"timestamp": 3.7986690001562238,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 3.3600001,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0837,
"sequence_id": 14
}
},
{
"timestamp": 4.056284500053152,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 3.6,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0217,
"sequence_id": 15
}
},
{
"timestamp": 4.2824959580320865,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 3.84,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0277,
"sequence_id": 16
}
},
{
"timestamp": 4.541013500187546,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 4.08,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0636,
"sequence_id": 17
}
},
{
"timestamp": 4.7826515410561115,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 4.32,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.092,
"sequence_id": 18
}
},
{
"timestamp": 5.044063208159059,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 4.56,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1632,
"sequence_id": 19
}
},
{
"timestamp": 5.277323708171025,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 4.8,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1748,
"sequence_id": 20
}
},
{
"timestamp": 5.519584750058129,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 5.04,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1267,
"sequence_id": 21
}
},
{
"timestamp": 5.761642290977761,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 5.28,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.085,
"sequence_id": 22
}
},
{
"timestamp": 5.985961250029504,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 5.52,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0726,
"sequence_id": 23
}
},
{
"timestamp": 6.235282083041966,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 5.76,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1489,
"sequence_id": 24
}
},
{
"timestamp": 6.479744625044987,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 6.0,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1815,
"sequence_id": 25
}
},
{
"timestamp": 6.722758750198409,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 6.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1548,
"sequence_id": 26
}
},
{
"timestamp": 7.02101350016892,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 6.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1779,
"sequence_id": 27
}
},
{
"timestamp": 7.2554090830963105,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 6.7200003,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1924,
"sequence_id": 28
}
},
{
"timestamp": 7.495738583151251,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 6.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0734,
"sequence_id": 29
}
},
{
"timestamp": 7.695259500062093,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 7.2,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0621,
"sequence_id": 30
}
},
{
"timestamp": 7.9374284581281245,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 7.44,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0523,
"sequence_id": 31
}
},
{
"timestamp": 8.201127333100885,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 7.68,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0868,
"sequence_id": 32
}
},
{
"timestamp": 8.452570000197738,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 7.92,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1788,
"sequence_id": 33
}
},
{
"timestamp": 8.6957666662056,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 8.16,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.3462,
"sequence_id": 34
}
},
{
"timestamp": 8.937032666057348,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 8.4,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.3477,
"sequence_id": 35
}
},
{
"timestamp": 9.179693832993507,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 8.64,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2825,
"sequence_id": 36
}
},
{
"timestamp": 9.439219749998301,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 8.88,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1785,
"sequence_id": 37
}
},
{
"timestamp": 9.65257745818235,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 9.12,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.119,
"sequence_id": 38
}
},
{
"timestamp": 9.894739540992305,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 9.36,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0948,
"sequence_id": 39
}
},
{
"timestamp": 10.137037916108966,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 9.6,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0836,
"sequence_id": 40
}
},
{
"timestamp": 10.37885733298026,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 9.84,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0648,
"sequence_id": 41
}
},
{
"timestamp": 10.640081625198945,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 10.08,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0426,
"sequence_id": 42
}
},
{
"timestamp": 10.882513708202168,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 10.32,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0297,
"sequence_id": 43
}
},
{
"timestamp": 11.11375533300452,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 10.56,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0247,
"sequence_id": 44
}
},
{
"timestamp": 11.356210750062019,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 10.8,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0134,
"sequence_id": 45
}
},
{
"timestamp": 11.60117325000465,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 11.04,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0102,
"sequence_id": 46
}
},
{
"timestamp": 11.859979416010901,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 11.28,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0089,
"sequence_id": 47
}
},
{
"timestamp": 12.093679000157863,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 11.52,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0074,
"sequence_id": 48
}
},
{
"timestamp": 12.334945333190262,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 11.76,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.007,
"sequence_id": 49
}
},
{
"timestamp": 12.588809041073546,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 12.0,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0067,
"sequence_id": 50
}
},
{
"timestamp": 12.83585675014183,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 12.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0042,
"sequence_id": 51
}
},
{
"timestamp": 13.075434750178829,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 12.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0047,
"sequence_id": 52
}
},
{
"timestamp": 13.31491966615431,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 12.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0036,
"sequence_id": 53
}
}
],
"transcript": ""
}

View file

@ -0,0 +1,637 @@
{
"audio_file": "multi_speaker.m4a",
"audio_path": "../audio/multi_speaker.m4a",
"provider": "deepgram",
"duration": 7.987664,
"created_at": "2026-01-20T12:15:06.097292",
"events": [
{
"timestamp": 2.50060111284256e-07,
"event_type": "SpeechStarted",
"data": {
"type": "SpeechStarted",
"channel": [
0,
1
],
"timestamp": 0.13
}
},
{
"timestamp": 0.9085824999492615,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 1.0399375,
"start": 0.0,
"is_final": false,
"speech_final": false,
"channel": {
"alternatives": [
{
"transcript": "Biggest pleasure",
"confidence": 0.7919922,
"words": [
{
"word": "biggest",
"start": 0.0,
"end": 0.39999998,
"confidence": 0.7919922,
"punctuated_word": "Biggest"
},
{
"word": "pleasure",
"start": 0.39999998,
"end": 0.79999995,
"confidence": 0.77734375,
"punctuated_word": "pleasure"
}
]
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 1.9669485830236226,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 2.0799375,
"start": 0.0,
"is_final": false,
"speech_final": false,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 3.0349432919174433,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 3.1199374,
"start": 0.0,
"is_final": false,
"speech_final": false,
"channel": {
"alternatives": [
{
"transcript": "Please give a text that I am just trying to",
"confidence": 0.4921875,
"words": [
{
"word": "please",
"start": 0.48,
"end": 0.79999995,
"confidence": 0.19970703,
"punctuated_word": "Please"
},
{
"word": "give",
"start": 0.79999995,
"end": 1.04,
"confidence": 0.2849121,
"punctuated_word": "give"
},
{
"word": "a",
"start": 0.96,
"end": 1.1999999,
"confidence": 0.4921875,
"punctuated_word": "a"
},
{
"word": "text",
"start": 1.1999999,
"end": 1.5999999,
"confidence": 0.4482422,
"punctuated_word": "text"
},
{
"word": "that",
"start": 1.5999999,
"end": 2.1599998,
"confidence": 0.5317383,
"punctuated_word": "that"
},
{
"word": "i",
"start": 2.1599998,
"end": 2.32,
"confidence": 0.984375,
"punctuated_word": "I"
},
{
"word": "am",
"start": 2.32,
"end": 2.48,
"confidence": 0.5024414,
"punctuated_word": "am"
},
{
"word": "just",
"start": 2.48,
"end": 2.6399999,
"confidence": 0.27416992,
"punctuated_word": "just"
},
{
"word": "trying",
"start": 2.6399999,
"end": 2.96,
"confidence": 0.19909668,
"punctuated_word": "trying"
},
{
"word": "to",
"start": 2.96,
"end": 3.04,
"confidence": 0.7060547,
"punctuated_word": "to"
}
]
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 4.100316457916051,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 4.1599374,
"start": 0.0,
"is_final": false,
"speech_final": false,
"channel": {
"alternatives": [
{
"transcript": "Is the test that I am just trying do so. Multiple",
"confidence": 0.7207031,
"words": [
{
"word": "is",
"start": 0.24,
"end": 0.79999995,
"confidence": 0.83251953,
"punctuated_word": "Is"
},
{
"word": "the",
"start": 0.88,
"end": 1.12,
"confidence": 0.14794922,
"punctuated_word": "the"
},
{
"word": "test",
"start": 1.12,
"end": 1.52,
"confidence": 0.7207031,
"punctuated_word": "test"
},
{
"word": "that",
"start": 1.52,
"end": 2.1599998,
"confidence": 0.40307617,
"punctuated_word": "that"
},
{
"word": "i",
"start": 2.1599998,
"end": 2.3999999,
"confidence": 0.99316406,
"punctuated_word": "I"
},
{
"word": "am",
"start": 2.3999999,
"end": 2.48,
"confidence": 0.52783203,
"punctuated_word": "am"
},
{
"word": "just",
"start": 2.48,
"end": 2.72,
"confidence": 0.27270508,
"punctuated_word": "just"
},
{
"word": "trying",
"start": 2.72,
"end": 3.12,
"confidence": 0.81591797,
"punctuated_word": "trying"
},
{
"word": "do",
"start": 3.12,
"end": 3.28,
"confidence": 0.9116211,
"punctuated_word": "do"
},
{
"word": "so",
"start": 3.28,
"end": 3.4399998,
"confidence": 0.37774658,
"punctuated_word": "so."
},
{
"word": "multiple",
"start": 3.6,
"end": 3.84,
"confidence": 0.74072266,
"punctuated_word": "Multiple"
}
]
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 4.506603500107303,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 4.53,
"start": 0.0,
"is_final": true,
"speech_final": true,
"channel": {
"alternatives": [
{
"transcript": "Is the test that I am testing multiple speaker",
"confidence": 0.65966797,
"words": [
{
"word": "is",
"start": 0.24,
"end": 0.39999998,
"confidence": 0.83984375,
"punctuated_word": "Is"
},
{
"word": "the",
"start": 0.39999998,
"end": 0.79999995,
"confidence": 0.15722656,
"punctuated_word": "the"
},
{
"word": "test",
"start": 1.12,
"end": 1.52,
"confidence": 0.8588867,
"punctuated_word": "test"
},
{
"word": "that",
"start": 1.52,
"end": 2.1599998,
"confidence": 0.35107422,
"punctuated_word": "that"
},
{
"word": "i",
"start": 2.1599998,
"end": 2.32,
"confidence": 0.99121094,
"punctuated_word": "I"
},
{
"word": "am",
"start": 2.32,
"end": 2.48,
"confidence": 0.6010742,
"punctuated_word": "am"
},
{
"word": "testing",
"start": 2.48,
"end": 3.12,
"confidence": 0.9526367,
"punctuated_word": "testing"
},
{
"word": "multiple",
"start": 3.4399998,
"end": 3.84,
"confidence": 0.65966797,
"punctuated_word": "multiple"
},
{
"word": "speaker",
"start": 3.84,
"end": 4.3199997,
"confidence": 0.20446777,
"punctuated_word": "speaker"
}
]
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 4.648572708014399,
"event_type": "SpeechStarted",
"data": {
"type": "SpeechStarted",
"channel": [
0,
1
],
"timestamp": 4.63
}
},
{
"timestamp": 5.556989792035893,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 1.0699372,
"start": 4.53,
"is_final": false,
"speech_final": false,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 6.615257542114705,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 2.08,
"start": 4.53,
"is_final": true,
"speech_final": true,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 6.769657667027786,
"event_type": "SpeechStarted",
"data": {
"type": "SpeechStarted",
"channel": [
0,
1
],
"timestamp": 6.72
}
},
{
"timestamp": 7.672739624977112,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 1.0099998,
"start": 6.61,
"is_final": true,
"speech_final": true,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 8.081677624955773,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 0.3676877,
"start": 7.62,
"is_final": true,
"speech_final": true,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 8.083154707914218,
"event_type": "Results",
"data": {
"type": "Results",
"channel_index": [
0,
1
],
"duration": 0.0,
"start": 7.9876876,
"is_final": true,
"speech_final": true,
"channel": {
"alternatives": [
{
"transcript": "",
"confidence": 0.0,
"words": []
}
]
},
"metadata": {
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"model_info": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
},
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
},
"from_finalize": false
}
},
{
"timestamp": 8.083194707985967,
"event_type": "Metadata",
"data": {
"type": "Metadata",
"transaction_key": "deprecated",
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
"sha256": "a6f954deb3fb3bf7a3c420061d5dd968251ba401d6304e6cd2fc9f396c12da77",
"created": "2026-01-20T06:44:57.522Z",
"duration": 7.9876876,
"channels": 1,
"models": [
"40bd3654-e622-47c4-a111-63a61b23bfe8"
],
"model_info": {
"40bd3654-e622-47c4-a111-63a61b23bfe8": {
"name": "general-nova-3",
"version": "2025-04-17.21547",
"arch": "nova-3"
}
}
}
}
],
"transcript": "Is the test that I am testing multiple speaker"
}

View file

@ -0,0 +1,445 @@
{
"audio_file": "nope.m4a",
"audio_path": "../audio/nope.m4a",
"provider": "deepgram-flux",
"duration": 3.390113,
"created_at": "2026-01-20T13:34:04.075559",
"events": [
{
"timestamp": 3.3294782042503357e-07,
"event_type": "Connected",
"data": {
"type": "Connected",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"sequence_id": 0
}
},
{
"timestamp": 0.6400237919297069,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1726,
"sequence_id": 1
}
},
{
"timestamp": 0.850623874925077,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0643,
"sequence_id": 2
}
},
{
"timestamp": 1.0877662498969585,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0343,
"sequence_id": 3
}
},
{
"timestamp": 1.3602930000051856,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.023,
"sequence_id": 4
}
},
{
"timestamp": 1.5734205420594662,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "StartOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.2,
"transcript": "No.",
"words": [
{
"word": "No.",
"confidence": 0.9956
}
],
"end_of_turn_confidence": 0.1445,
"sequence_id": 5
}
},
{
"timestamp": 1.7732612078543752,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "EndOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.36,
"transcript": "No.",
"words": [
{
"word": "No.",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.7266,
"sequence_id": 6
}
},
{
"timestamp": 2.0032672078814358,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 1.6,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2114,
"sequence_id": 7
}
},
{
"timestamp": 2.272528207860887,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 1.8399999,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2886,
"sequence_id": 8
}
},
{
"timestamp": 2.4770477078855038,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 2.08,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1366,
"sequence_id": 9
}
},
{
"timestamp": 2.7586996669415385,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 2.32,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0687,
"sequence_id": 10
}
},
{
"timestamp": 2.9688463748898357,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 2.56,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0571,
"sequence_id": 11
}
},
{
"timestamp": 3.2333728750236332,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 2.8,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0284,
"sequence_id": 12
}
},
{
"timestamp": 3.4381651668809354,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 3.04,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0352,
"sequence_id": 13
}
},
{
"timestamp": 3.7163160829804838,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 3.28,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0211,
"sequence_id": 14
}
},
{
"timestamp": 3.936306041898206,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 3.52,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0123,
"sequence_id": 15
}
},
{
"timestamp": 4.212840874912217,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 3.76,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0399,
"sequence_id": 16
}
},
{
"timestamp": 4.417071416974068,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 4.0,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0503,
"sequence_id": 17
}
},
{
"timestamp": 4.685962416930124,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 4.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0443,
"sequence_id": 18
}
},
{
"timestamp": 4.898042541928589,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 4.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0367,
"sequence_id": 19
}
},
{
"timestamp": 5.167347207898274,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 4.7200003,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0221,
"sequence_id": 20
}
},
{
"timestamp": 5.415992958005518,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 4.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1116,
"sequence_id": 21
}
},
{
"timestamp": 5.703707166947424,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 5.2,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0883,
"sequence_id": 22
}
},
{
"timestamp": 5.923421707935631,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 5.44,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0663,
"sequence_id": 23
}
},
{
"timestamp": 6.128664416959509,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 5.68,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0324,
"sequence_id": 24
}
},
{
"timestamp": 6.382756792008877,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 5.92,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0138,
"sequence_id": 25
}
},
{
"timestamp": 6.629080249927938,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.36,
"audio_window_end": 6.16,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0064,
"sequence_id": 26
}
}
],
"transcript": "No."
}

View file

@ -0,0 +1,678 @@
{
"audio_file": "not_so_sure.m4a",
"audio_path": "../audio/not_so_sure.m4a",
"provider": "deepgram-flux",
"duration": 3.784853,
"created_at": "2026-01-20T13:34:30.619814",
"events": [
{
"timestamp": 4.1606836020946503e-07,
"event_type": "Connected",
"data": {
"type": "Connected",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"sequence_id": 0
}
},
{
"timestamp": 0.6479636249132454,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2837,
"sequence_id": 1
}
},
{
"timestamp": 0.8711565409321338,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1409,
"sequence_id": 2
}
},
{
"timestamp": 1.0940386659931391,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.103,
"sequence_id": 3
}
},
{
"timestamp": 1.3378053328488022,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "StartOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.96,
"transcript": "I don",
"words": [
{
"word": "I",
"confidence": 0.8521
},
{
"word": "don",
"confidence": 0.9858
}
],
"end_of_turn_confidence": 0.1526,
"sequence_id": 4
}
},
{
"timestamp": 1.575752625009045,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.2,
"transcript": "I don't know",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know",
"confidence": 0.9956
}
],
"end_of_turn_confidence": 0.0815,
"sequence_id": 5
}
},
{
"timestamp": 1.809568207943812,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.44,
"transcript": "I don't know. I",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know.",
"confidence": 1.0
},
{
"word": "I",
"confidence": 0.9995
}
],
"end_of_turn_confidence": 0.0533,
"sequence_id": 6
}
},
{
"timestamp": 2.0778977079316974,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.6800001,
"transcript": "I don't know. I'm not",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know.",
"confidence": 1.0
},
{
"word": "I'm",
"confidence": 1.0
},
{
"word": "not",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.0296,
"sequence_id": 7
}
},
{
"timestamp": 2.3323032909538597,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.9200001,
"transcript": "I don't know. I'm not sure she",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know.",
"confidence": 1.0
},
{
"word": "I'm",
"confidence": 1.0
},
{
"word": "not",
"confidence": 1.0
},
{
"word": "sure",
"confidence": 0.9692
},
{
"word": "she",
"confidence": 0.6968
}
],
"end_of_turn_confidence": 0.1591,
"sequence_id": 8
}
},
{
"timestamp": 2.563972583040595,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.16,
"transcript": "I don't know. I'm not so sure.",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know.",
"confidence": 1.0
},
{
"word": "I'm",
"confidence": 1.0
},
{
"word": "not",
"confidence": 1.0
},
{
"word": "so",
"confidence": 0.9971
},
{
"word": "sure.",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.5312,
"sequence_id": 9
}
},
{
"timestamp": 2.766235665883869,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "EndOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 2.32,
"transcript": "I don't know. I'm not so sure.",
"words": [
{
"word": "I",
"confidence": 1.0
},
{
"word": "don't",
"confidence": 1.0
},
{
"word": "know.",
"confidence": 1.0
},
{
"word": "I'm",
"confidence": 1.0
},
{
"word": "not",
"confidence": 1.0
},
{
"word": "so",
"confidence": 0.9971
},
{
"word": "sure.",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.7129,
"sequence_id": 10
}
},
{
"timestamp": 2.980985000031069,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 2.56,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.6235,
"sequence_id": 11
}
},
{
"timestamp": 3.040183125063777,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 2.6399999,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.7163,
"sequence_id": 12
}
},
{
"timestamp": 3.134053166024387,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 2.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.7603,
"sequence_id": 13
}
},
{
"timestamp": 3.200523457955569,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 2.8,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.8013,
"sequence_id": 14
}
},
{
"timestamp": 3.3396010829601437,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 2.88,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.8052,
"sequence_id": 15
}
},
{
"timestamp": 3.462065916042775,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 3.04,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.6968,
"sequence_id": 16
}
},
{
"timestamp": 3.532107833074406,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 3.12,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.7026,
"sequence_id": 17
}
},
{
"timestamp": 3.6854247499722987,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 3.28,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.6123,
"sequence_id": 18
}
},
{
"timestamp": 3.9346718329470605,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 3.52,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.4551,
"sequence_id": 19
}
},
{
"timestamp": 4.174561291001737,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 3.76,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.293,
"sequence_id": 20
}
},
{
"timestamp": 4.423174874857068,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 4.0,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1186,
"sequence_id": 21
}
},
{
"timestamp": 4.661856249906123,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 4.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1186,
"sequence_id": 22
}
},
{
"timestamp": 4.934342915890738,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 4.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0629,
"sequence_id": 23
}
},
{
"timestamp": 5.1988217500038445,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 4.7200003,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0302,
"sequence_id": 24
}
},
{
"timestamp": 5.868438957957551,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 4.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0104,
"sequence_id": 25
}
},
{
"timestamp": 5.924830165924504,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 5.2,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0039,
"sequence_id": 26
}
},
{
"timestamp": 6.008775374852121,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 5.44,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.003,
"sequence_id": 27
}
},
{
"timestamp": 6.224981207866222,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 5.68,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0027,
"sequence_id": 28
}
},
{
"timestamp": 6.400387583067641,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 5.92,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0944,
"sequence_id": 29
}
},
{
"timestamp": 6.6102081660646945,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 6.16,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.083,
"sequence_id": 30
}
},
{
"timestamp": 6.853603166062385,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 6.4,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0674,
"sequence_id": 31
}
},
{
"timestamp": 7.1176844160072505,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
"event": "Update",
"turn_index": 1,
"audio_window_start": 2.32,
"audio_window_end": 6.64,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0348,
"sequence_id": 32
}
}
],
"transcript": "I don't know. I'm not so sure."
}

View file

@ -0,0 +1,936 @@
{
"audio_file": "not_so_sure.m4a",
"audio_path": "../audio/not_so_sure.m4a",
"provider": "speechmatics",
"duration": 3.784853,
"created_at": "2026-01-20T13:38:01.957263",
"events": [
{
"timestamp": 2.50060111284256e-07,
"event_type": "Info",
"data": {
"message": "Info",
"type": "concurrent_session_usage",
"reason": "1 concurrent sessions active out of quota 2",
"usage": 1,
"quota": 2,
"last_updated": "2026-01-20T08:07:53Z"
}
},
{
"timestamp": 0.17636274988763034,
"event_type": "RecognitionStarted",
"data": {
"message": "RecognitionStarted",
"orchestrator_version": "2026.01.09+e449221ca0+14.12.0",
"id": "ff50bcc6-03cc-4609-b52b-c61492be97b0",
"language_pack_info": {
"adapted": false,
"itn": true,
"language_description": "English",
"word_delimiter": " ",
"writing_direction": "left-to-right"
}
}
},
{
"timestamp": 0.1765422080643475,
"event_type": "Info",
"data": {
"message": "Info",
"type": "recognition_quality",
"reason": "Running recognition using a broadcast model quality.",
"quality": "broadcast"
}
},
{
"timestamp": 0.44156987499445677,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 1
}
},
{
"timestamp": 0.5090052080340683,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 2
}
},
{
"timestamp": 0.5927771248389035,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 3
}
},
{
"timestamp": 0.6792412919458002,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 4
}
},
{
"timestamp": 0.7540834578685462,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 5
}
},
{
"timestamp": 0.8363401249516755,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 6
}
},
{
"timestamp": 0.916276125004515,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 7
}
},
{
"timestamp": 1.0025545828975737,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 8
}
},
{
"timestamp": 1.0930295418947935,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 9
}
},
{
"timestamp": 1.1681176249403507,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 10
}
},
{
"timestamp": 1.2440201670397073,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 11
}
},
{
"timestamp": 1.3254928330425173,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 12
}
},
{
"timestamp": 1.411379124969244,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 13
}
},
{
"timestamp": 1.4989973329938948,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 14
}
},
{
"timestamp": 1.569762917002663,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 15
}
},
{
"timestamp": 1.6669557499699295,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 16
}
},
{
"timestamp": 1.7321407499257475,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 17
}
},
{
"timestamp": 1.8123597078956664,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 18
}
},
{
"timestamp": 1.89311487483792,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 19
}
},
{
"timestamp": 1.99575070803985,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 20
}
},
{
"timestamp": 2.0635348330251873,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 21
}
},
{
"timestamp": 2.136281125014648,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 22
}
},
{
"timestamp": 2.2212352079804987,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 23
}
},
{
"timestamp": 2.300102249952033,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 24
}
},
{
"timestamp": 2.3838018749374896,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 25
}
},
{
"timestamp": 2.4612751249223948,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 26
}
},
{
"timestamp": 2.5520844168495387,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 27
}
},
{
"timestamp": 2.6254100420046598,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 28
}
},
{
"timestamp": 2.7110170419327915,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 29
}
},
{
"timestamp": 2.793728666845709,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 30
}
},
{
"timestamp": 2.8698849170468748,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 31
}
},
{
"timestamp": 2.9517348748631775,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 32
}
},
{
"timestamp": 3.034996416885406,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 33
}
},
{
"timestamp": 3.1222795830108225,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 34
}
},
{
"timestamp": 3.2133053748402745,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 35
}
},
{
"timestamp": 3.2794892080128193,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 36
}
},
{
"timestamp": 3.360972832888365,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 37
}
},
{
"timestamp": 3.480351625010371,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 38
}
},
{
"timestamp": 3.527200457872823,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 39
}
},
{
"timestamp": 3.614834832958877,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 40
}
},
{
"timestamp": 3.7000621668994427,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 41
}
},
{
"timestamp": 3.7709098330233246,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 42
}
},
{
"timestamp": 3.870571249863133,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 43
}
},
{
"timestamp": 3.9319135828409344,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 44
}
},
{
"timestamp": 4.0240056668408215,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 45
}
},
{
"timestamp": 4.1135993748903275,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 46
}
},
{
"timestamp": 4.178906166926026,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 47
}
},
{
"timestamp": 4.262735291849822,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 48
}
},
{
"timestamp": 4.3524885000661016,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 49
}
},
{
"timestamp": 4.42170758289285,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 50
}
},
{
"timestamp": 4.503200083039701,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 51
}
},
{
"timestamp": 4.588893749983981,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 52
}
},
{
"timestamp": 4.6728779170662165,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 53
}
},
{
"timestamp": 4.749415792059153,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 54
}
},
{
"timestamp": 4.834314750041813,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 55
}
},
{
"timestamp": 4.934304124908522,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 56
}
},
{
"timestamp": 5.015187042066827,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 57
}
},
{
"timestamp": 5.083739625057206,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 58
}
},
{
"timestamp": 5.15739579196088,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 59
}
},
{
"timestamp": 5.254215708002448,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 60
}
},
{
"timestamp": 5.319055167026818,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 61
}
},
{
"timestamp": 5.422228208044544,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 62
}
},
{
"timestamp": 5.493815457914025,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 63
}
},
{
"timestamp": 5.562712874962017,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 64
}
},
{
"timestamp": 5.677756666904315,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 65
}
},
{
"timestamp": 5.728489124914631,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 66
}
},
{
"timestamp": 5.73234708304517,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [
{
"alternatives": [
{
"confidence": 1.0,
"content": "I",
"language": "en"
}
],
"end_time": 0.8,
"start_time": 0.64,
"type": "word"
}
],
"metadata": {
"end_time": 0.8,
"start_time": 0.0,
"transcript": "I "
}
}
},
{
"timestamp": 5.831468666903675,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 67
}
},
{
"timestamp": 5.9311752079520375,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 68
}
},
{
"timestamp": 5.970860542031005,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 69
}
},
{
"timestamp": 6.0573643748648465,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 70
}
},
{
"timestamp": 6.071638958062977,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [
{
"alternatives": [
{
"confidence": 1.0,
"content": "don't",
"language": "en"
}
],
"end_time": 1.08,
"start_time": 0.84,
"type": "word"
},
{
"alternatives": [
{
"confidence": 1.0,
"content": "know",
"language": "en"
}
],
"end_time": 1.2,
"start_time": 1.08,
"type": "word"
},
{
"alternatives": [
{
"confidence": 1.0,
"content": ".",
"language": "en"
}
],
"attaches_to": "previous",
"end_time": 1.2,
"is_eos": true,
"start_time": 1.2,
"type": "punctuation"
}
],
"metadata": {
"end_time": 1.2,
"start_time": 0.8,
"transcript": "don't know. "
}
}
},
{
"timestamp": 6.143923291936517,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 71
}
},
{
"timestamp": 6.229828458046541,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 72
}
},
{
"timestamp": 6.297467292053625,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 73
}
},
{
"timestamp": 6.388417499838397,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 74
}
},
{
"timestamp": 6.46747541683726,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [
{
"alternatives": [
{
"confidence": 1.0,
"content": "I'm",
"language": "en"
}
],
"end_time": 1.4,
"start_time": 1.2,
"type": "word"
},
{
"alternatives": [
{
"confidence": 1.0,
"content": "not",
"language": "en"
}
],
"end_time": 1.56,
"start_time": 1.4,
"type": "word"
}
],
"metadata": {
"end_time": 1.56,
"start_time": 1.2,
"transcript": "I'm not "
}
}
},
{
"timestamp": 6.467542249942198,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 75
}
},
{
"timestamp": 6.571689167059958,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 76
}
},
{
"timestamp": 6.633496082853526,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 77
}
},
{
"timestamp": 6.705628624884412,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 78
}
},
{
"timestamp": 6.791943500051275,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 79
}
},
{
"timestamp": 6.8231504168361425,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [
{
"alternatives": [
{
"confidence": 1.0,
"content": "so",
"language": "en"
}
],
"end_time": 1.72,
"start_time": 1.56,
"type": "word"
}
],
"metadata": {
"end_time": 1.72,
"start_time": 1.56,
"transcript": "so "
}
}
},
{
"timestamp": 6.889297208050266,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 80
}
},
{
"timestamp": 6.96820458304137,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 81
}
},
{
"timestamp": 7.030788874952123,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 82
}
},
{
"timestamp": 7.114988874876872,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 83
}
},
{
"timestamp": 7.1660370419267565,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [
{
"alternatives": [
{
"confidence": 1.0,
"content": "sure",
"language": "en"
}
],
"end_time": 2.2,
"start_time": 1.76,
"type": "word"
},
{
"alternatives": [
{
"confidence": 1.0,
"content": ".",
"language": "en"
}
],
"attaches_to": "previous",
"end_time": 2.2,
"is_eos": true,
"start_time": 2.2,
"type": "punctuation"
}
],
"metadata": {
"end_time": 2.2,
"start_time": 1.72,
"transcript": "sure. "
}
}
},
{
"timestamp": 7.197767958045006,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 84
}
},
{
"timestamp": 7.281636083032936,
"event_type": "AudioAdded",
"data": {
"message": "AudioAdded",
"seq_no": 85
}
},
{
"timestamp": 7.966639708029106,
"event_type": "AddTranscript",
"data": {
"message": "AddTranscript",
"format": "2.9",
"results": [],
"metadata": {
"end_time": 6.72,
"start_time": 2.28,
"transcript": ""
}
}
},
{
"timestamp": 7.966674832860008,
"event_type": "EndOfTranscript",
"data": {
"message": "EndOfTranscript"
}
}
],
"transcript": "I don't know. I'm not so sure."
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,402 @@
{
"audio_file": "yes.m4a",
"audio_path": "../audio/yes.m4a",
"provider": "deepgram-flux",
"duration": 2.507755,
"created_at": "2026-01-20T13:33:37.737569",
"events": [
{
"timestamp": 2.0791776478290558e-07,
"event_type": "Connected",
"data": {
"type": "Connected",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"sequence_id": 0
}
},
{
"timestamp": 0.6149860408622772,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2494,
"sequence_id": 1
}
},
{
"timestamp": 0.8699209159240127,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1246,
"sequence_id": 2
}
},
{
"timestamp": 1.0665327080059797,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0557,
"sequence_id": 3
}
},
{
"timestamp": 1.319559457944706,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "StartOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 0.96,
"transcript": "Yes.",
"words": [
{
"word": "Yes.",
"confidence": 0.9761
}
],
"end_of_turn_confidence": 0.0793,
"sequence_id": 4
}
},
{
"timestamp": 1.5604322908911854,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.2,
"transcript": "Yes.",
"words": [
{
"word": "Yes.",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.5703,
"sequence_id": 5
}
},
{
"timestamp": 1.6325784579385072,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "EndOfTurn",
"turn_index": 0,
"audio_window_start": 0.0,
"audio_window_end": 1.28,
"transcript": "Yes.",
"words": [
{
"word": "Yes.",
"confidence": 1.0
}
],
"end_of_turn_confidence": 0.7026,
"sequence_id": 6
}
},
{
"timestamp": 1.897370790829882,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 1.52,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.4883,
"sequence_id": 7
}
},
{
"timestamp": 2.117000916041434,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 1.76,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.3801,
"sequence_id": 8
}
},
{
"timestamp": 2.3733394159935415,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 2.0,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.2346,
"sequence_id": 9
}
},
{
"timestamp": 2.6072654998861253,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 2.24,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1049,
"sequence_id": 10
}
},
{
"timestamp": 2.85038537485525,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 2.48,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.075,
"sequence_id": 11
}
},
{
"timestamp": 3.091235165949911,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 2.72,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0218,
"sequence_id": 12
}
},
{
"timestamp": 3.3325049998238683,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 2.96,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.03,
"sequence_id": 13
}
},
{
"timestamp": 3.577521916013211,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 3.2,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0189,
"sequence_id": 14
}
},
{
"timestamp": 3.8645569998770952,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 3.44,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0118,
"sequence_id": 15
}
},
{
"timestamp": 4.106258499901742,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 3.68,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0089,
"sequence_id": 16
}
},
{
"timestamp": 4.346511875046417,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 3.92,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0073,
"sequence_id": 17
}
},
{
"timestamp": 4.589668208034709,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 4.16,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0053,
"sequence_id": 18
}
},
{
"timestamp": 4.826804416021332,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 4.4,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0034,
"sequence_id": 19
}
},
{
"timestamp": 5.060472874902189,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 4.64,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0024,
"sequence_id": 20
}
},
{
"timestamp": 5.304136332822964,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 4.88,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1091,
"sequence_id": 21
}
},
{
"timestamp": 5.544230999890715,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 5.12,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.1007,
"sequence_id": 22
}
},
{
"timestamp": 5.779906540876254,
"event_type": "TurnInfo",
"data": {
"type": "TurnInfo",
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
"event": "Update",
"turn_index": 1,
"audio_window_start": 1.28,
"audio_window_end": 5.36,
"transcript": "",
"words": [],
"end_of_turn_confidence": 0.0565,
"sequence_id": 23
}
}
],
"transcript": "Yes."
}

41
evals/visualizer/.gitignore vendored Normal file
View file

@ -0,0 +1,41 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# env files (can opt-in for committing if needed)
.env*
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts

View file

@ -0,0 +1,36 @@
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
## Getting Started
First, run the development server:
```bash
npm run dev
# or
yarn dev
# or
pnpm dev
# or
bun dev
```
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
## Learn More
To learn more about Next.js, take a look at the following resources:
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
## Deploy on Vercel
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

View file

@ -0,0 +1,18 @@
import { defineConfig, globalIgnores } from "eslint/config";
import nextVitals from "eslint-config-next/core-web-vitals";
import nextTs from "eslint-config-next/typescript";
const eslintConfig = defineConfig([
...nextVitals,
...nextTs,
// Override default ignores of eslint-config-next.
globalIgnores([
// Default ignores of eslint-config-next:
".next/**",
"out/**",
"build/**",
"next-env.d.ts",
]),
]);
export default eslintConfig;

View file

@ -0,0 +1,7 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {
/* config options here */
};
export default nextConfig;

View file

@ -0,0 +1,26 @@
{
"name": "visualizer",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "eslint"
},
"dependencies": {
"next": "16.1.4",
"react": "19.2.3",
"react-dom": "19.2.3"
},
"devDependencies": {
"@tailwindcss/postcss": "^4",
"@types/node": "^20",
"@types/react": "^19",
"@types/react-dom": "^19",
"eslint": "^9",
"eslint-config-next": "16.1.4",
"tailwindcss": "^4",
"typescript": "^5"
}
}

4008
evals/visualizer/pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,5 @@
packages:
- .
ignoredBuiltDependencies:
- sharp
- unrs-resolver

View file

@ -0,0 +1,7 @@
const config = {
plugins: {
"@tailwindcss/postcss": {},
},
};
export default config;

View file

@ -0,0 +1 @@
<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>

After

Width:  |  Height:  |  Size: 391 B

View file

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>

After

Width:  |  Height:  |  Size: 1 KiB

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View file

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>

After

Width:  |  Height:  |  Size: 128 B

View file

@ -0,0 +1 @@
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>

After

Width:  |  Height:  |  Size: 385 B

View file

@ -0,0 +1,42 @@
import { NextRequest, NextResponse } from "next/server";
import fs from "fs";
import path from "path";
const AUDIO_DIR = path.join(process.cwd(), "..", "stt", "audio");
const MIME_TYPES: Record<string, string> = {
".mp3": "audio/mpeg",
".wav": "audio/wav",
".m4a": "audio/mp4",
".ogg": "audio/ogg",
".webm": "audio/webm",
};
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ filename: string }> }
) {
try {
const { filename } = await params;
const filePath = path.join(AUDIO_DIR, filename);
if (!fs.existsSync(filePath)) {
return NextResponse.json({ error: "Audio file not found" }, { status: 404 });
}
const ext = path.extname(filename).toLowerCase();
const contentType = MIME_TYPES[ext] || "application/octet-stream";
const fileBuffer = fs.readFileSync(filePath);
return new NextResponse(fileBuffer, {
headers: {
"Content-Type": contentType,
"Content-Length": fileBuffer.length.toString(),
},
});
} catch (error) {
console.error("Error serving audio:", error);
return NextResponse.json({ error: "Failed to serve audio" }, { status: 500 });
}
}

View file

@ -0,0 +1,27 @@
import { NextRequest, NextResponse } from "next/server";
import fs from "fs";
import path from "path";
const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
export async function GET(
request: NextRequest,
{ params }: { params: Promise<{ id: string }> }
) {
try {
const { id } = await params;
const filePath = path.join(RESULTS_DIR, `${id}.json`);
if (!fs.existsSync(filePath)) {
return NextResponse.json({ error: "Result not found" }, { status: 404 });
}
const content = fs.readFileSync(filePath, "utf-8");
const data = JSON.parse(content);
return NextResponse.json(data);
} catch (error) {
console.error("Error reading result:", error);
return NextResponse.json({ error: "Failed to read result" }, { status: 500 });
}
}

View file

@ -0,0 +1,47 @@
import { NextResponse } from "next/server";
import fs from "fs";
import path from "path";
import { ResultSummary, EventCaptureResult } from "@/types";
const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
export async function GET() {
try {
if (!fs.existsSync(RESULTS_DIR)) {
return NextResponse.json([]);
}
const files = fs.readdirSync(RESULTS_DIR).filter((f) => f.endsWith(".json"));
const results: ResultSummary[] = [];
for (const file of files) {
try {
const filePath = path.join(RESULTS_DIR, file);
const content = fs.readFileSync(filePath, "utf-8");
const data: EventCaptureResult = JSON.parse(content);
results.push({
id: file.replace(".json", ""),
audio_file: data.audio_file,
provider: data.provider,
duration: data.duration,
created_at: data.created_at,
event_count: data.events.length,
});
} catch {
console.error(`Failed to parse ${file}`);
}
}
// Sort by created_at descending
results.sort(
(a, b) =>
new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
);
return NextResponse.json(results);
} catch (error) {
console.error("Error reading results:", error);
return NextResponse.json({ error: "Failed to read results" }, { status: 500 });
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View file

@ -0,0 +1,26 @@
@import "tailwindcss";
:root {
--background: #ffffff;
--foreground: #171717;
}
@theme inline {
--color-background: var(--background);
--color-foreground: var(--foreground);
--font-sans: var(--font-geist-sans);
--font-mono: var(--font-geist-mono);
}
@media (prefers-color-scheme: dark) {
:root {
--background: #0a0a0a;
--foreground: #ededed;
}
}
body {
background: var(--background);
color: var(--foreground);
font-family: Arial, Helvetica, sans-serif;
}

View file

@ -0,0 +1,34 @@
import type { Metadata } from "next";
import { Geist, Geist_Mono } from "next/font/google";
import "./globals.css";
const geistSans = Geist({
variable: "--font-geist-sans",
subsets: ["latin"],
});
const geistMono = Geist_Mono({
variable: "--font-geist-mono",
subsets: ["latin"],
});
export const metadata: Metadata = {
title: "STT Event Visualizer",
description: "Visualize WebSocket events from STT providers",
};
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
<html lang="en">
<body
className={`${geistSans.variable} ${geistMono.variable} antialiased`}
>
{children}
</body>
</html>
);
}

View file

@ -0,0 +1,129 @@
"use client";
import { useEffect, useState } from "react";
import Link from "next/link";
import { ResultSummary } from "@/types";
function formatDuration(seconds: number): string {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, "0")}`;
}
function formatDate(isoString: string): string {
const date = new Date(isoString);
return date.toLocaleDateString("en-US", {
year: "numeric",
month: "short",
day: "numeric",
hour: "2-digit",
minute: "2-digit",
});
}
const PROVIDER_COLORS: Record<string, string> = {
deepgram: "bg-blue-500/20 text-blue-300",
"deepgram-flux": "bg-green-500/20 text-green-300",
speechmatics: "bg-purple-500/20 text-purple-300",
};
export default function Home() {
const [results, setResults] = useState<ResultSummary[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
async function fetchResults() {
try {
const response = await fetch("/api/results");
if (!response.ok) {
throw new Error("Failed to fetch results");
}
const data = await response.json();
setResults(data);
} catch (err) {
setError(err instanceof Error ? err.message : "Unknown error");
} finally {
setLoading(false);
}
}
fetchResults();
}, []);
return (
<div className="min-h-screen bg-zinc-950 text-white">
<div className="max-w-4xl mx-auto px-6 py-12">
<header className="mb-12">
<h1 className="text-3xl font-bold">STT Event Visualizer</h1>
<p className="text-zinc-400 mt-2">
Visualize captured WebSocket events from STT providers
</p>
</header>
{loading && (
<div className="flex items-center justify-center py-12">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
</div>
)}
{error && (
<div className="bg-red-500/20 text-red-300 p-4 rounded-lg">
{error}
</div>
)}
{!loading && !error && results.length === 0 && (
<div className="text-center py-12 text-zinc-500">
<p className="text-lg mb-4">No results found</p>
<p className="text-sm">
Run the event capture script to generate results:
</p>
<code className="block mt-2 bg-zinc-800 p-3 rounded text-zinc-300 text-sm">
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
</code>
</div>
)}
{!loading && !error && results.length > 0 && (
<div className="space-y-3">
{results.map((result) => (
<Link
key={result.id}
href={`/view/${result.id}`}
className="block bg-zinc-900 hover:bg-zinc-800 rounded-lg p-4 transition-colors"
>
<div className="flex items-center justify-between">
<div className="space-y-1">
<div className="flex items-center gap-3">
<span className="font-medium">{result.audio_file}</span>
<span
className={`text-xs px-2 py-0.5 rounded ${
PROVIDER_COLORS[result.provider] ||
"bg-zinc-700 text-zinc-300"
}`}
>
{result.provider}
</span>
</div>
<div className="text-sm text-zinc-500">
{formatDate(result.created_at)}
</div>
</div>
<div className="text-right space-y-1">
<div className="text-sm text-zinc-400">
{formatDuration(result.duration)}
</div>
<div className="text-xs text-zinc-500">
{result.event_count} events
</div>
</div>
</div>
</Link>
))}
</div>
)}
</div>
</div>
);
}

View file

@ -0,0 +1,158 @@
"use client";
import { useEffect, useState, useCallback } from "react";
import { useParams } from "next/navigation";
import Link from "next/link";
import { EventCaptureResult } from "@/types";
import AudioPlayer from "@/components/AudioPlayer";
import EventTimeline from "@/components/EventTimeline";
import EventList from "@/components/EventList";
const PROVIDER_COLORS: Record<string, string> = {
deepgram: "bg-blue-500/20 text-blue-300",
"deepgram-flux": "bg-green-500/20 text-green-300",
speechmatics: "bg-purple-500/20 text-purple-300",
};
export default function ViewPage() {
const params = useParams();
const id = params.id as string;
const [result, setResult] = useState<EventCaptureResult | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [currentTime, setCurrentTime] = useState(0);
const [isPlaying, setIsPlaying] = useState(false);
useEffect(() => {
async function fetchResult() {
try {
const response = await fetch(`/api/results/${id}`);
if (!response.ok) {
if (response.status === 404) {
throw new Error("Result not found");
}
throw new Error("Failed to fetch result");
}
const data = await response.json();
setResult(data);
} catch (err) {
setError(err instanceof Error ? err.message : "Unknown error");
} finally {
setLoading(false);
}
}
if (id) {
fetchResult();
}
}, [id]);
const handleTimeUpdate = useCallback((time: number) => {
setCurrentTime(time);
}, []);
const handlePlayingChange = useCallback((playing: boolean) => {
setIsPlaying(playing);
}, []);
const handleSeek = useCallback((time: number) => {
setCurrentTime(time);
}, []);
if (loading) {
return (
<div className="min-h-screen bg-zinc-950 text-white flex items-center justify-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
</div>
);
}
if (error) {
return (
<div className="min-h-screen bg-zinc-950 text-white p-6">
<div className="max-w-4xl mx-auto">
<Link href="/" className="text-zinc-400 hover:text-white mb-4 inline-block">
&larr; Back to results
</Link>
<div className="bg-red-500/20 text-red-300 p-4 rounded-lg">{error}</div>
</div>
</div>
);
}
if (!result) {
return null;
}
const audioUrl = `/api/audio/${result.audio_file}`;
return (
<div className="min-h-screen bg-zinc-950 text-white">
<div className="max-w-7xl mx-auto px-6 py-6">
{/* Header */}
<header className="mb-6">
<Link href="/" className="text-zinc-400 hover:text-white mb-2 inline-block text-sm">
&larr; Back to results
</Link>
<div className="flex items-center gap-3">
<h1 className="text-2xl font-bold">{result.audio_file}</h1>
<span
className={`text-sm px-2 py-0.5 rounded ${
PROVIDER_COLORS[result.provider] || "bg-zinc-700 text-zinc-300"
}`}
>
{result.provider}
</span>
</div>
{result.transcript && (
<p className="text-zinc-400 mt-2 text-sm line-clamp-2">
{result.transcript}
</p>
)}
</header>
{/* Main content */}
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
{/* Left column: Audio player and timeline */}
<div className="lg:col-span-2 space-y-4">
<AudioPlayer
audioUrl={audioUrl}
duration={result.duration}
currentTime={currentTime}
onTimeUpdate={handleTimeUpdate}
onPlayingChange={handlePlayingChange}
/>
<EventTimeline
events={result.events}
duration={result.duration}
currentTime={currentTime}
onSeek={handleSeek}
/>
{/* Transcript section */}
{result.transcript && (
<div className="bg-zinc-800 rounded-lg p-4">
<div className="text-sm text-zinc-400 font-medium mb-2">
Final Transcript
</div>
<p className="text-zinc-300">{result.transcript}</p>
</div>
)}
</div>
{/* Right column: Event list */}
<div className="lg:col-span-1 h-[calc(100vh-12rem)]">
<EventList
events={result.events}
currentTime={currentTime}
onSeek={handleSeek}
provider={result.provider}
/>
</div>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,145 @@
"use client";
import { useRef, useEffect, useState, useCallback } from "react";
interface AudioPlayerProps {
audioUrl: string;
duration: number;
currentTime: number;
onTimeUpdate: (time: number) => void;
onPlayingChange: (playing: boolean) => void;
}
function formatTime(seconds: number): string {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, "0")}`;
}
export default function AudioPlayer({
audioUrl,
duration,
currentTime,
onTimeUpdate,
onPlayingChange,
}: AudioPlayerProps) {
const audioRef = useRef<HTMLAudioElement>(null);
const [isPlaying, setIsPlaying] = useState(false);
const [internalTime, setInternalTime] = useState(0);
useEffect(() => {
const audio = audioRef.current;
if (!audio) return;
const handleTimeUpdate = () => {
setInternalTime(audio.currentTime);
onTimeUpdate(audio.currentTime);
};
const handlePlay = () => {
setIsPlaying(true);
onPlayingChange(true);
};
const handlePause = () => {
setIsPlaying(false);
onPlayingChange(false);
};
const handleEnded = () => {
setIsPlaying(false);
onPlayingChange(false);
};
audio.addEventListener("timeupdate", handleTimeUpdate);
audio.addEventListener("play", handlePlay);
audio.addEventListener("pause", handlePause);
audio.addEventListener("ended", handleEnded);
return () => {
audio.removeEventListener("timeupdate", handleTimeUpdate);
audio.removeEventListener("play", handlePlay);
audio.removeEventListener("pause", handlePause);
audio.removeEventListener("ended", handleEnded);
};
}, [onTimeUpdate, onPlayingChange]);
// Seek to currentTime when it changes externally
useEffect(() => {
const audio = audioRef.current;
if (!audio) return;
// Only seek if the difference is significant (user clicked timeline)
if (Math.abs(audio.currentTime - currentTime) > 0.5) {
audio.currentTime = currentTime;
}
}, [currentTime]);
const togglePlay = useCallback(() => {
const audio = audioRef.current;
if (!audio) return;
if (isPlaying) {
audio.pause();
} else {
audio.play();
}
}, [isPlaying]);
const handleSeek = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
const audio = audioRef.current;
if (!audio) return;
const newTime = parseFloat(e.target.value);
audio.currentTime = newTime;
setInternalTime(newTime);
onTimeUpdate(newTime);
}, [onTimeUpdate]);
return (
<div className="bg-zinc-900 rounded-lg p-4 space-y-3">
<audio ref={audioRef} src={audioUrl} preload="metadata" />
<div className="flex items-center gap-4">
<button
onClick={togglePlay}
className="w-12 h-12 rounded-full bg-white text-black flex items-center justify-center hover:bg-zinc-200 transition-colors"
>
{isPlaying ? (
<svg className="w-5 h-5" fill="currentColor" viewBox="0 0 20 20">
<path
fillRule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zM7 8a1 1 0 012 0v4a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v4a1 1 0 102 0V8a1 1 0 00-1-1z"
clipRule="evenodd"
/>
</svg>
) : (
<svg className="w-5 h-5 ml-1" fill="currentColor" viewBox="0 0 20 20">
<path
fillRule="evenodd"
d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z"
clipRule="evenodd"
/>
</svg>
)}
</button>
<div className="flex-1 space-y-1">
<input
type="range"
min={0}
max={duration}
step={0.1}
value={internalTime}
onChange={handleSeek}
className="w-full h-2 bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-white"
/>
<div className="flex justify-between text-xs text-zinc-400">
<span>{formatTime(internalTime)}</span>
<span>{formatTime(duration)}</span>
</div>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,141 @@
"use client";
import { useEffect, useRef, useMemo, useState } from "react";
import { CapturedEvent } from "@/types";
import { DeepgramEventItem, FluxEventItem, SpeechmaticsEventItem } from "./events";
interface EventListProps {
events: CapturedEvent[];
currentTime: number;
onSeek: (time: number) => void;
provider: string;
}
function formatTime(seconds: number): string {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
const ms = Math.floor((seconds % 1) * 100);
return `${mins}:${secs.toString().padStart(2, "0")}.${ms.toString().padStart(2, "0")}`;
}
function getEventItemComponent(provider: string) {
if (provider === "deepgram-flux") {
return FluxEventItem;
}
if (provider === "speechmatics") {
return SpeechmaticsEventItem;
}
// Default to Deepgram Nova
return DeepgramEventItem;
}
export default function EventList({
events,
currentTime,
onSeek,
provider,
}: EventListProps) {
const containerRef = useRef<HTMLDivElement>(null);
const [expandedEvents, setExpandedEvents] = useState<Set<number>>(new Set());
const [autoScroll, setAutoScroll] = useState(true);
const EventItemComponent = getEventItemComponent(provider);
// Find the current event index based on time
const currentEventIndex = useMemo(() => {
for (let i = events.length - 1; i >= 0; i--) {
if (events[i].timestamp <= currentTime) {
return i;
}
}
return -1;
}, [events, currentTime]);
// Auto-scroll to current event
useEffect(() => {
if (!autoScroll || currentEventIndex < 0) return;
const container = containerRef.current;
if (!container) return;
const eventElement = container.querySelector(`[data-index="${currentEventIndex}"]`);
if (eventElement) {
eventElement.scrollIntoView({ behavior: "smooth", block: "center" });
}
}, [currentEventIndex, autoScroll]);
const toggleExpand = (index: number) => {
setExpandedEvents((prev) => {
const next = new Set(prev);
if (next.has(index)) {
next.delete(index);
} else {
next.add(index);
}
return next;
});
};
return (
<div className="bg-zinc-800 rounded-lg flex flex-col h-full">
<div className="flex justify-between items-center px-4 py-2 border-b border-zinc-700">
<div className="text-sm text-zinc-400 font-medium">
Events ({events.length})
</div>
<label className="flex items-center gap-2 text-xs text-zinc-500 cursor-pointer">
<input
type="checkbox"
checked={autoScroll}
onChange={(e) => setAutoScroll(e.target.checked)}
className="rounded"
/>
Auto-scroll
</label>
</div>
<div
ref={containerRef}
className="flex-1 overflow-y-auto divide-y divide-zinc-700/50"
>
{events.map((event, index) => {
const isCurrent = index === currentEventIndex;
const isExpanded = expandedEvents.has(index);
return (
<div
key={index}
data-index={index}
className={`p-3 cursor-pointer transition-colors ${
isCurrent ? "bg-zinc-700/50" : "hover:bg-zinc-700/30"
}`}
onClick={() => onSeek(event.timestamp)}
>
<div className="flex items-start gap-2">
{/* Current indicator */}
<div className="pt-1">
{isCurrent ? (
<div className="w-2 h-2 rounded-full bg-white" />
) : (
<div className="w-2 h-2 rounded-full bg-zinc-600" />
)}
</div>
{/* Timestamp */}
<span className="text-xs text-zinc-500 font-mono pt-0.5">
{formatTime(event.timestamp)}
</span>
{/* Provider-specific event item */}
<EventItemComponent
event={event}
isExpanded={isExpanded}
onToggleExpand={() => toggleExpand(index)}
/>
</div>
</div>
);
})}
</div>
</div>
);
}

View file

@ -0,0 +1,119 @@
"use client";
import { useMemo } from "react";
import { CapturedEvent } from "@/types";
interface EventTimelineProps {
events: CapturedEvent[];
duration: number;
currentTime: number;
onSeek: (time: number) => void;
}
const EVENT_COLORS: Record<string, string> = {
Results: "bg-blue-500",
TurnInfo: "bg-green-500",
AddTranscript: "bg-purple-500",
Connected: "bg-yellow-500",
RecognitionStarted: "bg-yellow-500",
EndOfTranscript: "bg-red-500",
Metadata: "bg-gray-500",
Error: "bg-red-600",
default: "bg-zinc-400",
};
function formatTime(seconds: number): string {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, "0")}`;
}
export default function EventTimeline({
events,
duration,
currentTime,
onSeek,
}: EventTimelineProps) {
const timeMarkers = useMemo(() => {
const markers: number[] = [];
const interval = Math.ceil(duration / 6);
for (let i = 0; i <= duration; i += interval) {
markers.push(i);
}
if (markers[markers.length - 1] !== Math.floor(duration)) {
markers.push(Math.floor(duration));
}
return markers;
}, [duration]);
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
const rect = e.currentTarget.getBoundingClientRect();
const x = e.clientX - rect.left;
const percent = x / rect.width;
const time = percent * duration;
onSeek(Math.max(0, Math.min(time, duration)));
};
const progressPercent = (currentTime / duration) * 100;
return (
<div className="bg-zinc-800 rounded-lg p-4 space-y-2">
<div className="text-sm text-zinc-400 font-medium">Event Timeline</div>
<div
className="relative h-16 bg-zinc-900 rounded cursor-pointer overflow-hidden"
onClick={handleClick}
>
{/* Progress indicator */}
<div
className="absolute top-0 bottom-0 bg-zinc-700/50 pointer-events-none"
style={{ width: `${Math.min(progressPercent, 100)}%` }}
/>
{/* Current time indicator */}
<div
className="absolute top-0 bottom-0 w-0.5 bg-white z-10 pointer-events-none"
style={{ left: `${Math.min(progressPercent, 100)}%` }}
/>
{/* Event markers */}
<div className="absolute inset-0 flex items-center">
{events.map((event, index) => {
const leftPercent = Math.min((event.timestamp / duration) * 100, 100);
const colorClass =
EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
return (
<div
key={index}
className={`absolute w-2 h-8 rounded-sm ${colorClass} opacity-80 hover:opacity-100 transition-opacity`}
style={{ left: `${leftPercent}%`, transform: "translateX(-50%)" }}
title={`${formatTime(event.timestamp)} - ${event.event_type}`}
/>
);
})}
</div>
</div>
{/* Time markers */}
<div className="flex justify-between text-xs text-zinc-500">
{timeMarkers.map((time, index) => (
<span key={index}>{formatTime(time)}</span>
))}
</div>
{/* Legend */}
<div className="flex flex-wrap gap-3 pt-2">
{Object.entries(EVENT_COLORS)
.filter(([key]) => key !== "default")
.slice(0, 6)
.map(([eventType, colorClass]) => (
<div key={eventType} className="flex items-center gap-1 text-xs text-zinc-400">
<div className={`w-2 h-2 rounded-sm ${colorClass}`} />
<span>{eventType}</span>
</div>
))}
</div>
</div>
);
}

View file

@ -0,0 +1,98 @@
"use client";
import { CapturedEvent } from "@/types";
interface DeepgramEventItemProps {
event: CapturedEvent;
isExpanded: boolean;
onToggleExpand: () => void;
}
const EVENT_COLORS: Record<string, string> = {
Results: "text-blue-400 bg-blue-500/10",
SpeechStarted: "text-yellow-400 bg-yellow-500/10",
Metadata: "text-gray-400 bg-gray-500/10",
UtteranceEnd: "text-red-500 bg-red-600/10",
default: "text-zinc-400 bg-zinc-500/10",
};
function getTranscript(event: CapturedEvent): string {
const data = event.data;
const channel = data.channel as Record<string, unknown> | undefined;
if (channel) {
const alternatives = channel.alternatives as Array<{ transcript?: string }> | undefined;
if (alternatives?.[0]?.transcript) {
return alternatives[0].transcript;
}
}
return "";
}
export default function DeepgramEventItem({
event,
isExpanded,
onToggleExpand,
}: DeepgramEventItemProps) {
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
const data = event.data;
const transcript = getTranscript(event);
const isFinal = data.is_final as boolean | undefined;
const speechFinal = data.speech_final as boolean | undefined;
// For non-Results events
const isConnection = event.event_type === "Connected";
const isMetadata = event.event_type === "Metadata";
return (
<div className="flex-1 min-w-0 space-y-1">
<div className="flex items-center gap-2 flex-wrap">
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
{event.event_type}
</span>
{/* Final/Partial indicator for Results */}
{isFinal !== undefined && (
<span
className={`text-xs px-2 py-0.5 rounded ${isFinal
? "text-emerald-400 bg-emerald-500/10"
: "text-amber-400 bg-amber-500/10"
}`}
>
{isFinal ? "Final" : "Partial"}
</span>
)}
{/* Speech Final indicator */}
{speechFinal && (
<span className="text-xs px-2 py-0.5 rounded text-cyan-400 bg-cyan-500/10">
Speech Final
</span>
)}
</div>
{/* Transcript or status message */}
<div className="text-sm text-zinc-300 truncate">
{transcript}
</div>
{/* Expand/collapse button */}
<button
onClick={(e) => {
e.stopPropagation();
onToggleExpand();
}}
className="text-xs text-zinc-500 hover:text-zinc-300"
>
{isExpanded ? "Hide details" : "Show details"}
</button>
{/* Expanded JSON view */}
{isExpanded && (
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
{JSON.stringify(event.data, null, 2)}
</pre>
)}
</div>
);
}

View file

@ -0,0 +1,115 @@
"use client";
import { CapturedEvent } from "@/types";
interface FluxEventItemProps {
event: CapturedEvent;
isExpanded: boolean;
onToggleExpand: () => void;
}
const EVENT_COLORS: Record<string, string> = {
TurnInfo: "text-green-400 bg-green-500/10",
Connected: "text-yellow-400 bg-yellow-500/10",
Error: "text-red-500 bg-red-600/10",
default: "text-zinc-400 bg-zinc-500/10",
};
const FLUX_EVENT_COLORS: Record<string, string> = {
Update: "text-amber-300 bg-amber-500/20",
EndOfTurn: "text-emerald-300 bg-emerald-500/20",
EagerEndOfTurn: "text-cyan-300 bg-cyan-500/20",
StartOfTurn: "text-blue-300 bg-blue-500/20",
TurnResumed: "text-purple-300 bg-purple-500/20",
default: "text-zinc-300 bg-zinc-500/20",
};
export default function FluxEventItem({
event,
isExpanded,
onToggleExpand,
}: FluxEventItemProps) {
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
const data = event.data;
// Flux TurnInfo fields
const fluxEvent = data.event as string | undefined;
const transcript = data.transcript as string | undefined;
const endOfTurnConfidence = data.end_of_turn_confidence as number | undefined;
const turnIndex = data.turn_index as number | undefined;
const isFinal = fluxEvent === "EndOfTurn";
const fluxEventColor = fluxEvent
? FLUX_EVENT_COLORS[fluxEvent] || FLUX_EVENT_COLORS.default
: "";
// For non-TurnInfo events
const isConnection = event.event_type === "Connected";
return (
<div className="flex-1 min-w-0 space-y-1">
<div className="flex items-center gap-2 flex-wrap">
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
{event.event_type}
</span>
{/* Flux sub-event type */}
{fluxEvent && (
<span className={`text-xs px-2 py-0.5 rounded ${fluxEventColor}`}>
{fluxEvent}
</span>
)}
{/* Final/Partial indicator */}
{fluxEvent && (
<span
className={`text-xs px-2 py-0.5 rounded ${
isFinal
? "text-emerald-400 bg-emerald-500/10"
: "text-amber-400 bg-amber-500/10"
}`}
>
{isFinal ? "Final" : "Partial"}
</span>
)}
{/* Turn index */}
{turnIndex !== undefined && (
<span className="text-xs text-zinc-500">
Turn {turnIndex}
</span>
)}
{/* EOT confidence */}
{endOfTurnConfidence !== undefined && (
<span className="text-xs text-zinc-500 font-mono">
EOT: {(endOfTurnConfidence * 100).toFixed(1)}%
</span>
)}
</div>
{/* Transcript or status message */}
<div className="text-sm text-zinc-300 truncate">
{transcript || (isConnection ? "[Connected]" : `[${fluxEvent || event.event_type}]`)}
</div>
{/* Expand/collapse button */}
<button
onClick={(e) => {
e.stopPropagation();
onToggleExpand();
}}
className="text-xs text-zinc-500 hover:text-zinc-300"
>
{isExpanded ? "Hide details" : "Show details"}
</button>
{/* Expanded JSON view */}
{isExpanded && (
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
{JSON.stringify(event.data, null, 2)}
</pre>
)}
</div>
);
}

View file

@ -0,0 +1,101 @@
"use client";
import { CapturedEvent } from "@/types";
interface SpeechmaticsEventItemProps {
event: CapturedEvent;
isExpanded: boolean;
onToggleExpand: () => void;
}
const EVENT_COLORS: Record<string, string> = {
AddTranscript: "text-purple-400 bg-purple-500/10",
RecognitionStarted: "text-yellow-400 bg-yellow-500/10",
EndOfTranscript: "text-red-400 bg-red-500/10",
Warning: "text-orange-400 bg-orange-500/10",
Error: "text-red-500 bg-red-600/10",
default: "text-zinc-400 bg-zinc-500/10",
};
function getTranscript(event: CapturedEvent): string {
const data = event.data;
const results = data.results as Array<{
type?: string;
alternatives?: Array<{ content?: string }>;
}> | undefined;
if (results) {
const words = results
.filter((r) => r.type === "word" && r.alternatives?.[0]?.content)
.map((r) => r.alternatives![0].content)
.join(" ");
return words;
}
return "";
}
export default function SpeechmaticsEventItem({
event,
isExpanded,
onToggleExpand,
}: SpeechmaticsEventItemProps) {
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
const data = event.data;
const transcript = getTranscript(event);
// Status events
const isRecognitionStarted = event.event_type === "RecognitionStarted";
const isEndOfTranscript = event.event_type === "EndOfTranscript";
const isWarning = event.event_type === "Warning";
// Warning reason
const warningReason = isWarning ? (data.reason as string | undefined) : undefined;
return (
<div className="flex-1 min-w-0 space-y-1">
<div className="flex items-center gap-2 flex-wrap">
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
{event.event_type}
</span>
{/* AddTranscript is always final in Speechmatics */}
{event.event_type === "AddTranscript" && (
<span className="text-xs px-2 py-0.5 rounded text-emerald-400 bg-emerald-500/10">
Final
</span>
)}
</div>
{/* Transcript or status message */}
<div className="text-sm text-zinc-300 truncate">
{transcript ||
(isRecognitionStarted
? "[Recognition Started]"
: isEndOfTranscript
? "[End of Transcript]"
: isWarning
? `[Warning: ${warningReason || "unknown"}]`
: `[${event.event_type}]`)}
</div>
{/* Expand/collapse button */}
<button
onClick={(e) => {
e.stopPropagation();
onToggleExpand();
}}
className="text-xs text-zinc-500 hover:text-zinc-300"
>
{isExpanded ? "Hide details" : "Show details"}
</button>
{/* Expanded JSON view */}
{isExpanded && (
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
{JSON.stringify(event.data, null, 2)}
</pre>
)}
</div>
);
}

View file

@ -0,0 +1,3 @@
export { default as DeepgramEventItem } from "./DeepgramEventItem";
export { default as FluxEventItem } from "./FluxEventItem";
export { default as SpeechmaticsEventItem } from "./SpeechmaticsEventItem";

View file

@ -0,0 +1,24 @@
export interface CapturedEvent {
timestamp: number;
event_type: string;
data: Record<string, unknown>;
}
export interface EventCaptureResult {
audio_file: string;
audio_path: string;
provider: string;
duration: number;
created_at: string;
events: CapturedEvent[];
transcript: string;
}
export interface ResultSummary {
id: string;
audio_file: string;
provider: string;
duration: number;
created_at: string;
event_count: number;
}

View file

@ -0,0 +1,34 @@
{
"compilerOptions": {
"target": "ES2017",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "react-jsx",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
},
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx",
".next/types/**/*.ts",
".next/dev/types/**/*.ts",
"**/*.mts"
],
"exclude": ["node_modules"]
}

@ -1 +1 @@
Subproject commit a1d3062446240b6b27ebc787d28578e4561e7441
Subproject commit f11fad8f3e90e06b1625b9dc49c13e26f3c9e716

View file

@ -16,6 +16,11 @@
"type": "json",
"path": "ui/package.json",
"jsonpath": "$.version"
},
{
"type": "toml",
"path": "api/pyproject.toml",
"key": "project.version"
}
]
}

View file

@ -1,6 +1,6 @@
import { redirect } from "next/navigation";
import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
import { getServerAccessToken,getServerAuthProvider, getServerUser } from "@/lib/auth/server";
import logger from '@/lib/logger';
import { getRedirectUrl } from "@/lib/utils";
@ -34,21 +34,18 @@ export default async function AfterSignInPage() {
try {
const accessToken = await getServerAccessToken();
if (accessToken) {
const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
headers: {
Authorization: `Bearer ${accessToken}`,
},
});
const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
const activeWorkflows = workflows.filter(w => w.status === 'active');
logger.debug('[AfterSignInPage] Found workflows:', {
total: workflows.length,
active: activeWorkflows.length
total: countResponse.data?.total,
active: countResponse.data?.active
});
if (activeWorkflows.length > 0) {
if (countResponse.data && countResponse.data.active > 0) {
logger.debug('[AfterSignInPage] Redirecting to /workflow - user has workflows');
redirect('/workflow');
} else {

View file

@ -0,0 +1,33 @@
import { NextResponse } from "next/server";
import { healthApiV1HealthGet } from "@/client/sdk.gen";
import type { HealthResponse } from "@/client/types.gen";
// Import version from package.json at build time
import packageJson from "../../../../../package.json";
export async function GET() {
const uiVersion = packageJson.version || "dev";
// Fetch backend version and config from health endpoint
let apiVersion = "unknown";
let backendApiEndpoint: string | null = null;
try {
const response = await healthApiV1HealthGet();
if (response.data) {
const data = response.data as HealthResponse;
apiVersion = data.version;
backendApiEndpoint = data.backend_api_endpoint;
}
} catch {
// Backend might not be reachable during build or in some deployments
apiVersion = "unavailable";
}
return NextResponse.json({
ui: uiVersion,
api: apiVersion,
backendApiEndpoint,
});
}

View file

@ -9,6 +9,7 @@ import AppLayout from "@/components/layout/AppLayout";
import PostHogIdentify from "@/components/PostHogIdentify";
import SpinLoader from "@/components/SpinLoader";
import { Toaster } from "@/components/ui/sonner";
import { AppConfigProvider } from "@/context/AppConfigContext";
import { OnboardingProvider } from "@/context/OnboardingContext";
import { UserConfigProvider } from "@/context/UserConfigContext";
import { AuthProvider } from "@/lib/auth";
@ -59,18 +60,20 @@ export default function RootLayout({
<body
className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
<AuthProvider>
<Suspense fallback={<SpinLoader />}>
<UserConfigProvider>
<OnboardingProvider>
<PostHogIdentify />
<AppLayout>
{children}
</AppLayout>
<Toaster />
<ChatwootWidget />
</OnboardingProvider>
</UserConfigProvider>
</Suspense>
<AppConfigProvider>
<Suspense fallback={<SpinLoader />}>
<UserConfigProvider>
<OnboardingProvider>
<PostHogIdentify />
<AppLayout>
{children}
</AppLayout>
<Toaster />
<ChatwootWidget />
</OnboardingProvider>
</UserConfigProvider>
</Suspense>
</AppConfigProvider>
</AuthProvider>
</body>
</html>

View file

@ -1,7 +1,7 @@
import { isNextRouterError } from "next/dist/client/components/is-next-router-error";
import { redirect } from "next/navigation";
import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
import SignInClient from "@/components/SignInClient";
import { getServerAccessToken,getServerAuthProvider,getServerUser } from "@/lib/auth/server";
import logger from '@/lib/logger';
@ -21,21 +21,18 @@ export default async function Home() {
try {
const accessToken = await getServerAccessToken();
if (accessToken) {
const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
headers: {
Authorization: `Bearer ${accessToken}`,
},
});
const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
const activeWorkflows = workflows.filter(w => w.status === 'active');
logger.debug('[HomePage] Found workflows for local provider:', {
total: workflows.length,
active: activeWorkflows.length
total: countResponse.data?.total,
active: countResponse.data?.active
});
if (activeWorkflows.length > 0) {
if (countResponse.data && countResponse.data.active > 0) {
logger.debug('[HomePage] Redirecting to /workflow - user has workflows');
redirect('/workflow');
} else {

View file

@ -326,14 +326,64 @@ export default function UsagePage() {
isDisabled={savingTimezone || userConfigLoading}
placeholder={userConfigLoading ? "Loading..." : "Select timezone"}
styles={{
control: (base) => ({
control: (base, state) => ({
...base,
minHeight: '36px',
fontSize: '14px',
backgroundColor: 'var(--background)',
borderColor: state.isFocused ? 'var(--ring)' : 'var(--border)',
boxShadow: state.isFocused ? '0 0 0 2px color-mix(in srgb, var(--ring) 20%, transparent)' : 'none',
'&:hover': {
borderColor: 'var(--border)',
},
}),
menu: (base) => ({
...base,
zIndex: 9999,
backgroundColor: 'var(--popover)',
border: '1px solid var(--border)',
boxShadow: '0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
}),
menuList: (base) => ({
...base,
backgroundColor: 'var(--popover)',
padding: 0,
}),
option: (base, state) => ({
...base,
backgroundColor: state.isSelected
? 'var(--accent)'
: state.isFocused
? 'var(--accent)'
: 'var(--popover)',
color: 'var(--foreground)',
cursor: 'pointer',
'&:active': {
backgroundColor: 'var(--accent)',
},
}),
singleValue: (base) => ({
...base,
color: 'var(--foreground)',
}),
input: (base) => ({
...base,
color: 'var(--foreground)',
}),
placeholder: (base) => ({
...base,
color: 'var(--muted-foreground)',
}),
indicatorSeparator: (base) => ({
...base,
backgroundColor: 'var(--border)',
}),
dropdownIndicator: (base) => ({
...base,
color: 'var(--muted-foreground)',
'&:hover': {
color: 'var(--foreground)',
},
}),
}}
/>

File diff suppressed because one or more lines are too long

View file

@ -524,6 +524,12 @@ export type HttpValidationError = {
detail?: Array<ValidationError>;
};
export type HealthResponse = {
status: string;
version: string;
backend_api_endpoint: string;
};
/**
* Configuration for HTTP API tools.
*/
@ -1042,6 +1048,15 @@ export type VonageConfigurationResponse = {
*/
export type WebhookCredentialType = 'none' | 'api_key' | 'bearer_token' | 'basic_auth' | 'custom_header';
/**
* Response for workflow count endpoint.
*/
export type WorkflowCountResponse = {
total: number;
active: number;
archived: number;
};
export type WorkflowError = {
kind: ItemKind;
id: string | null;
@ -1049,6 +1064,17 @@ export type WorkflowError = {
message: string;
};
/**
* Lightweight response for workflow listings (excludes large fields).
*/
export type WorkflowListResponse = {
id: number;
name: string;
status: string;
created_at: string;
total_runs: number;
};
export type WorkflowOption = {
id: number;
name: string;
@ -1391,6 +1417,7 @@ export type HandleInboundTelephonyApiV1TelephonyInboundWorkflowIdPostData = {
'x-twilio-signature'?: string | null;
'x-vobiz-signature'?: string | null;
'x-vobiz-timestamp'?: string | null;
'x-cx-apikey'?: string | null;
};
path: {
workflow_id: number;
@ -1655,6 +1682,39 @@ export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses =
export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponse = CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses[keyof CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses];
export type GetWorkflowCountApiV1WorkflowCountGetData = {
body?: never;
headers?: {
authorization?: string | null;
'X-API-Key'?: string | null;
};
path?: never;
query?: never;
url: '/api/v1/workflow/count';
};
export type GetWorkflowCountApiV1WorkflowCountGetErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type GetWorkflowCountApiV1WorkflowCountGetError = GetWorkflowCountApiV1WorkflowCountGetErrors[keyof GetWorkflowCountApiV1WorkflowCountGetErrors];
export type GetWorkflowCountApiV1WorkflowCountGetResponses = {
/**
* Successful Response
*/
200: WorkflowCountResponse;
};
export type GetWorkflowCountApiV1WorkflowCountGetResponse = GetWorkflowCountApiV1WorkflowCountGetResponses[keyof GetWorkflowCountApiV1WorkflowCountGetResponses];
export type GetWorkflowsApiV1WorkflowFetchGetData = {
body?: never;
headers?: {
@ -1688,7 +1748,7 @@ export type GetWorkflowsApiV1WorkflowFetchGetResponses = {
/**
* Successful Response
*/
200: Array<WorkflowResponse>;
200: Array<WorkflowListResponse>;
};
export type GetWorkflowsApiV1WorkflowFetchGetResponse = GetWorkflowsApiV1WorkflowFetchGetResponses[keyof GetWorkflowsApiV1WorkflowFetchGetResponses];
@ -4168,6 +4228,41 @@ export type InitiateCallApiV1PublicAgentUuidPostResponses = {
export type InitiateCallApiV1PublicAgentUuidPostResponse = InitiateCallApiV1PublicAgentUuidPostResponses[keyof InitiateCallApiV1PublicAgentUuidPostResponses];
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData = {
body?: never;
path: {
token: string;
artifact_type: 'recording' | 'transcript';
};
query?: {
/**
* Display inline in browser instead of download
*/
inline?: boolean;
};
url: '/api/v1/public/download/workflow/{token}/{artifact_type}';
};
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors = {
/**
* Not found
*/
404: unknown;
/**
* Validation Error
*/
422: HttpValidationError;
};
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetError = DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors[keyof DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors];
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses = {
/**
* Successful Response
*/
200: unknown;
};
export type DeactivateEmbedTokenApiV1WorkflowWorkflowIdEmbedTokenDeleteData = {
body?: never;
headers?: {
@ -4500,9 +4595,11 @@ export type HealthApiV1HealthGetResponses = {
/**
* Successful Response
*/
200: unknown;
200: HealthResponse;
};
export type HealthApiV1HealthGetResponse = HealthApiV1HealthGetResponses[keyof HealthApiV1HealthGetResponses];
export type ClientOptions = {
baseUrl: 'http://127.0.0.1:8000' | (string & {});
};

View file

@ -22,6 +22,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
const [isOpen, setIsOpen] = useState(false);
const [mediaType, setMediaType] = useState<'audio' | 'transcript' | null>(null);
const [mediaSignedUrl, setMediaSignedUrl] = useState<string | null>(null);
const [transcriptContent, setTranscriptContent] = useState<string | null>(null);
const [selectedRunId, setSelectedRunId] = useState<number | null>(null);
const [mediaDownloadKey, setMediaDownloadKey] = useState<string | null>(null);
const [mediaLoading, setMediaLoading] = useState(false);
@ -47,6 +48,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
async (fileKey: string | null, runId: number) => {
if (!fileKey || !accessToken) return;
setMediaLoading(true);
setTranscriptContent(null);
const signed = await getSignedUrl(fileKey, accessToken, true);
if (signed) {
setMediaType('transcript');
@ -54,6 +56,14 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
setMediaDownloadKey(fileKey);
setSelectedRunId(runId);
setIsOpen(true);
// Fetch transcript content with proper UTF-8 encoding
try {
const response = await fetch(signed);
const text = await response.text();
setTranscriptContent(text);
} catch (error) {
console.error('Error fetching transcript:', error);
}
}
setMediaLoading(false);
},
@ -84,12 +94,10 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
<audio src={mediaSignedUrl} controls autoPlay className="w-full mt-4" />
)}
{!mediaLoading && mediaType === 'transcript' && mediaSignedUrl && (
<iframe
src={mediaSignedUrl}
title="Transcript"
className="w-full h-[60vh] border rounded-md mt-4"
/>
{!mediaLoading && mediaType === 'transcript' && transcriptContent && (
<pre className="w-full h-[60vh] overflow-auto border rounded-md mt-4 p-4 bg-muted text-sm whitespace-pre-wrap font-mono">
{transcriptContent}
</pre>
)}
<DialogFooter className="pt-4">

View file

@ -321,9 +321,20 @@ export default function ServiceConfiguration() {
if (!providerSchema) return [];
// Find all config fields (not provider, not api_key)
return Object.keys(providerSchema.properties).filter(
const fields = Object.keys(providerSchema.properties).filter(
field => field !== "provider" && field !== "api_key"
);
// For Deepgram STT, hide language field when flux-general-en model is selected
// Flux model is English-only and doesn't support language selection
if (service === "stt" && currentProvider === "deepgram") {
const currentModel = watch("stt_model") as string;
if (currentModel === "flux-general-en") {
return fields.filter(field => field !== "language");
}
}
return fields;
};
const renderServiceFields = (service: ServiceSegment) => {

View file

@ -35,7 +35,7 @@ interface EndCallNodeProps extends NodeProps {
}
export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
const { open, setOpen, handleSaveNodeData } = useNodeHandlers({
const { open, setOpen, handleSaveNodeData, handleDeleteNode } = useNodeHandlers({
id,
additionalData: { is_end: true }
});
@ -122,9 +122,14 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
</NodeContent>
<NodeToolbar isVisible={selected} position={Position.Right}>
<Button onClick={() => setOpen(true)} variant="outline" size="icon">
<Edit />
</Button>
<div className="flex flex-col gap-1">
<Button onClick={() => setOpen(true)} variant="outline" size="icon">
<Edit />
</Button>
<Button onClick={handleDeleteNode} variant="outline" size="icon">
<Trash2Icon />
</Button>
</div>
</NodeToolbar>
<NodeEditDialog

Some files were not shown because too many files have changed in this diff Show more