fix: changes to update pipecat version to 0.0.100 (#122)

* feat: add stt evals * add smart turn as provider * chore: remove deprecations * chore: format files * fix: remove deprecated UserIdleProcessor * fix: remove deprecated TranscriptProcessor * chore: update pipecat submodule * feat: add evals visualisation * fix: trigger llm generation on client connected and pipeline started * chore: update pipecat * chore: update pipecat submodule * Add tests * fix: slow loading of workflow page * chore: update pipecat submodule * Show version after release * Fixes #99 * fix: provider check for websocket connection * Fixes #107 * Fix #96 * chore: fix documentation * fix: cloudonix campaign call error --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
2026-07-25 12:01:04 +02:00 · 2026-01-23 18:53:59 +05:30 · 2026-01-23 18:53:59 +05:30 · 911c5ed416
commit 911c5ed416
parent a4367bd83b
104 changed files with 16919 additions and 597 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1 +1,2 @@
-api/.env
+api/.env
+evals/
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@ -4,7 +4,7 @@ on:
  release:
    types: [published]

-# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push 
+# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
@ -13,11 +13,11 @@ jobs:
  build:
    runs-on: ubuntu-latest
    env:
-      COMMIT_SHA: ${{ github.sha }}  # Used to tag images with short commit SHA
+      COMMIT_SHA: ${{ github.sha }}

    strategy:
      matrix:
-        service: 
+        service:
          - "dograh-api|api/Dockerfile|."
          - "dograh-ui|ui/Dockerfile|."

@ -25,14 +25,12 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
-          submodules: true  # Only for version check, not used in build
+          submodules: true

-      # Pipecat version check removed - now using local submodule
-
-      - name: Set up QEMU  # Enables cross-platform builds (e.g., arm64)
+      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

-      - name: Set up Docker Buildx  # Enables multi-arch and advanced Docker builds
+      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Log in to DockerHub
@ -51,48 +49,50 @@ jobs:
      - name: Set build variables
        id: build-vars
        run: |
-          # Parse matrix entry and set variables early (before build)
          SERVICE="${{ matrix.service }}"
          IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
          SHORT_SHA=${COMMIT_SHA::8}
-          
-          # Export for use in subsequent steps
+
+          # Get version from release tag (removes 'dograh-' and 'v' prefixes if present)
+          VERSION="${{ github.event.release.tag_name }}"
+          VERSION="${VERSION#dograh-}"
+          VERSION="${VERSION#v}"
+
          echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
          echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
-          echo "service=${SERVICE}" >> $GITHUB_OUTPUT
-      
+          echo "version=${VERSION}" >> $GITHUB_OUTPUT
+
      - name: Build and Push ${{ matrix.service }}
        id: docker-build
        run: |
-          # Parse matrix entry into individual variables
          SERVICE="${{ matrix.service }}"
          IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
          DOCKERFILE=$(echo "$SERVICE" | cut -d '|' -f2)
          CONTEXT=$(echo "$SERVICE" | cut -d '|' -f3)
          SHORT_SHA=${COMMIT_SHA::8}
+          VERSION="${{ steps.build-vars.outputs.version }}"

          echo "Building and pushing image: $IMAGE_NAME"
          echo "Dockerfile: $DOCKERFILE"
          echo "Context: $CONTEXT"
-          echo "Commit SHA: $SHORT_SHA"
-          
-          # Export tags for Slack notification
+          echo "Version: $VERSION"
+
          echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
          echo "dockerhub_tag=${{ secrets.DOCKERHUB_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
          echo "ghcr_tag=ghcr.io/${{ secrets.GHCR_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
          echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT

-          # Build and push multi-arch Docker image to DockerHub and GHCR
          docker buildx build \
            -f "$DOCKERFILE" \
            --platform linux/amd64,linux/arm64 \
+            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$VERSION \
            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
            --tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:latest \
+            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$VERSION \
            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
            --tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:latest \
            --push "$CONTEXT"
-      
-      # Success notification
+
      - name: Send Slack notification - Success
        if: success()
        uses: slackapi/slack-github-action@v1.26.0
@ -101,10 +101,9 @@ jobs:
        with:
          payload: |
            {
-              "text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }}"
+              "text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }}"
            }
-      
-      # Failure notification
+
      - name: Send Slack notification - Failure
        if: failure()
        uses: slackapi/slack-github-action@v1.26.0
@ -113,5 +112,5 @@ jobs:
        with:
          payload: |
            {
-              "text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
+              "text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
            }
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 __pycache__
 .DS_Store
 .env
+.env.prod
 .env.test

 # logs and run directory on production
--- a/api/alembic/versions/181475b2a1a1_add_public_access_token.py
+++ b/api/alembic/versions/181475b2a1a1_add_public_access_token.py
@ -0,0 +1,72 @@
+"""add public_access_token
+
+Revision ID: 181475b2a1a1
+Revises: dc33eef8dabe
+Create Date: 2026-01-23 17:37:54.449308
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "181475b2a1a1"
+down_revision: Union[str, None] = "dc33eef8dabe"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f("ix_api_keys_key_hash"), table_name="api_keys")
+    op.create_index("ix_api_keys_key_hash", "api_keys", ["key_hash"], unique=False)
+    op.create_index(
+        "ix_kb_chunks_embedding_ivfflat",
+        "knowledge_base_chunks",
+        ["embedding"],
+        unique=False,
+        postgresql_using="ivfflat",
+        postgresql_with={"lists": 100},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+    op.create_index(
+        "ix_kb_chunks_embedding_model",
+        "knowledge_base_chunks",
+        ["embedding_model"],
+        unique=False,
+    )
+    op.add_column(
+        "workflow_runs",
+        sa.Column("public_access_token", sa.String(length=36), nullable=True),
+    )
+    op.create_index(
+        "idx_workflow_runs_public_access_token",
+        "workflow_runs",
+        ["public_access_token"],
+        unique=True,
+        postgresql_where=sa.text("public_access_token IS NOT NULL"),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(
+        "idx_workflow_runs_public_access_token",
+        table_name="workflow_runs",
+        postgresql_where=sa.text("public_access_token IS NOT NULL"),
+    )
+    op.drop_column("workflow_runs", "public_access_token")
+    op.drop_index("ix_kb_chunks_embedding_model", table_name="knowledge_base_chunks")
+    op.drop_index(
+        "ix_kb_chunks_embedding_ivfflat",
+        table_name="knowledge_base_chunks",
+        postgresql_using="ivfflat",
+        postgresql_with={"lists": 100},
+        postgresql_ops={"embedding": "vector_cosine_ops"},
+    )
+    op.drop_index("ix_api_keys_key_hash", table_name="api_keys")
+    op.create_index(op.f("ix_api_keys_key_hash"), "api_keys", ["key_hash"], unique=True)
+    # ### end Alembic commands ###
--- a/api/constants.py
+++ b/api/constants.py
@ -14,7 +14,6 @@ FILLER_SOUND_PROBABILITY = 0.0
 VOICEMAIL_RECORDING_DURATION = 5.0

 # Configuration constants
-ENABLE_SMART_TURN = os.getenv("ENABLE_SMART_TURN", "false").lower() == "true"
 ENABLE_TRACING = os.getenv("ENABLE_TRACING", "false").lower() == "true"
 ENABLE_RNNOISE = os.getenv("ENABLE_RNNOISE", "false").lower() == "true"

@ -52,6 +51,23 @@ ENABLE_ARI_STASIS = os.getenv("ENABLE_ARI_STASIS", "false").lower() == "true"
 SERIALIZE_LOG_OUTPUT = os.getenv("SERIALIZE_LOG_OUTPUT", "false").lower() == "true"
 ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true"

+
+def _get_version() -> str:
+    """Read version from pyproject.toml."""
+    try:
+        import tomllib
+
+        pyproject_path = APP_ROOT_DIR / "pyproject.toml"
+        with open(pyproject_path, "rb") as f:
+            pyproject = tomllib.load(f)
+        return pyproject.get("project", {}).get("version", "dev")
+    except Exception:
+        return "dev"
+
+
+# Application version (read from pyproject.toml)
+APP_VERSION = _get_version()
+
 # Country code mapping: ISO country code -> international dialing prefix
 COUNTRY_CODES = {
    "US": "1",  # United States
--- a/api/db/models.py
+++ b/api/db/models.py
@ -360,6 +360,17 @@ class WorkflowRunModel(Base):
    campaign = relationship("CampaignModel")
    queued_run_id = Column(Integer, ForeignKey("queued_runs.id"), nullable=True)
    queued_run = relationship("QueuedRunModel", foreign_keys=[queued_run_id])
+    public_access_token = Column(String(36), nullable=True)
+
+    # Indexes
+    __table_args__ = (
+        Index(
+            "idx_workflow_runs_public_access_token",
+            "public_access_token",
+            unique=True,
+            postgresql_where=text("public_access_token IS NOT NULL"),
+        ),
+    )


 # LoopTalk Testing Models
--- a/api/db/workflow_client.py
+++ b/api/db/workflow_client.py
@ -4,7 +4,7 @@ from typing import Optional

 from sqlalchemy import func
 from sqlalchemy.future import select
-from sqlalchemy.orm import selectinload
+from sqlalchemy.orm import load_only, selectinload

 from api.db.base_client import BaseDBClient
 from api.db.models import WorkflowDefinitionModel, WorkflowModel, WorkflowRunModel
@ -111,6 +111,70 @@ class WorkflowClient(BaseDBClient):
            result = await session.execute(query)
            return result.scalars().all()

+    async def get_all_workflows_for_listing(
+        self, organization_id: int = None, status: str = None
+    ) -> list[WorkflowModel]:
+        """Get workflows with only the columns needed for listing.
+
+        This is an optimized version that excludes large JSON columns like
+        workflow_definition, template_context_variables, etc.
+
+        Args:
+            organization_id: Filter by organization ID
+            status: Filter by status (active/archived)
+
+        Returns:
+            List of WorkflowModel with only id, name, status, created_at loaded
+        """
+        async with self.async_session() as session:
+            query = select(WorkflowModel).options(
+                load_only(
+                    WorkflowModel.id,
+                    WorkflowModel.name,
+                    WorkflowModel.status,
+                    WorkflowModel.created_at,
+                )
+            )
+
+            if organization_id:
+                query = query.where(WorkflowModel.organization_id == organization_id)
+
+            if status:
+                query = query.where(WorkflowModel.status == status)
+
+            result = await session.execute(query)
+            return result.scalars().all()
+
+    async def get_workflow_counts(self, organization_id: int = None) -> dict[str, int]:
+        """Get workflow counts by status.
+
+        Args:
+            organization_id: Filter by organization ID
+
+        Returns:
+            Dict with 'total', 'active', 'archived' counts
+        """
+        async with self.async_session() as session:
+            query = select(
+                WorkflowModel.status,
+                func.count(WorkflowModel.id).label("count"),
+            )
+
+            if organization_id:
+                query = query.where(WorkflowModel.organization_id == organization_id)
+
+            query = query.group_by(WorkflowModel.status)
+
+            result = await session.execute(query)
+            rows = result.all()
+
+            counts = {"total": 0, "active": 0, "archived": 0}
+            for status, count in rows:
+                counts[status] = count
+                counts["total"] += count
+
+            return counts
+
    async def get_workflow(
        self, workflow_id: int, user_id: int = None, organization_id: int = None
    ) -> WorkflowModel | None:
@ -310,3 +374,33 @@ class WorkflowClient(BaseDBClient):
                )
            )
            return result.scalar() or 0
+
+    async def get_workflow_run_counts(self, workflow_ids: list[int]) -> dict[int, int]:
+        """Get run counts for multiple workflows in a single query.
+
+        Args:
+            workflow_ids: List of workflow IDs to get counts for
+
+        Returns:
+            Dict mapping workflow_id to run count
+        """
+        if not workflow_ids:
+            return {}
+
+        async with self.async_session() as session:
+            result = await session.execute(
+                select(
+                    WorkflowRunModel.workflow_id,
+                    func.count(WorkflowRunModel.id).label("run_count"),
+                )
+                .where(WorkflowRunModel.workflow_id.in_(workflow_ids))
+                .group_by(WorkflowRunModel.workflow_id)
+            )
+            rows = result.all()
+
+            # Build dict with counts, defaulting to 0 for workflows with no runs
+            counts = {workflow_id: 0 for workflow_id in workflow_ids}
+            for workflow_id, run_count in rows:
+                counts[workflow_id] = run_count
+
+            return counts
--- a/api/db/workflow_run_client.py
+++ b/api/db/workflow_run_client.py
@ -1,3 +1,4 @@
+import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple

@ -414,3 +415,56 @@ class WorkflowRunClient(BaseDBClient):

            organization_id = workflow_run.workflow.user.selected_organization_id
            return workflow_run, organization_id
+
+    async def ensure_public_access_token(self, workflow_run_id: int) -> Optional[str]:
+        """Generate a public access token if not exists, return existing if present (idempotent).
+
+        Args:
+            workflow_run_id: The ID of the workflow run
+
+        Returns:
+            The public access token string, or None if workflow run not found
+        """
+        async with self.async_session() as session:
+            result = await session.execute(
+                select(WorkflowRunModel).where(WorkflowRunModel.id == workflow_run_id)
+            )
+            run = result.scalars().first()
+            if not run:
+                return None
+
+            # Return existing token if present
+            if run.public_access_token:
+                return run.public_access_token
+
+            # Generate and persist new token
+            token = str(uuid.uuid4())
+            run.public_access_token = token
+
+            try:
+                await session.commit()
+            except Exception as e:
+                await session.rollback()
+                raise e
+            await session.refresh(run)
+
+            return run.public_access_token
+
+    async def get_workflow_run_by_public_token(
+        self, token: str
+    ) -> Optional[WorkflowRunModel]:
+        """Lookup workflow run by public access token.
+
+        Args:
+            token: The public access token
+
+        Returns:
+            The WorkflowRunModel if found, None otherwise
+        """
+        async with self.async_session() as session:
+            result = await session.execute(
+                select(WorkflowRunModel).where(
+                    WorkflowRunModel.public_access_token == token
+                )
+            )
+            return result.scalars().first()
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -0,0 +1,5 @@
+[project]
+name = "dograh-api"
+version = "1.10.0"
+description = "Backend API for Dograh voice AI platform"
+requires-python = ">=3.12"
--- a/api/routes/main.py
+++ b/api/routes/main.py
@ -1,5 +1,6 @@
 from fastapi import APIRouter
 from loguru import logger
+from pydantic import BaseModel

 from api.routes.campaign import router as campaign_router
 from api.routes.credentials import router as credentials_router
@ -9,6 +10,7 @@ from api.routes.looptalk import router as looptalk_router
 from api.routes.organization import router as organization_router
 from api.routes.organization_usage import router as organization_usage_router
 from api.routes.public_agent import router as public_agent_router
+from api.routes.public_download import router as public_download_router
 from api.routes.public_embed import router as public_embed_router
 from api.routes.reports import router as reports_router
 from api.routes.s3_signed_url import router as s3_router
@ -43,11 +45,24 @@ router.include_router(reports_router)
 router.include_router(webrtc_signaling_router)
 router.include_router(public_embed_router)
 router.include_router(public_agent_router)
+router.include_router(public_download_router)
 router.include_router(workflow_embed_router)
 router.include_router(knowledge_base_router)


-@router.get("/health")
-async def health():
+class HealthResponse(BaseModel):
+    status: str
+    version: str
+    backend_api_endpoint: str
+
+
+@router.get("/health", response_model=HealthResponse)
+async def health() -> HealthResponse:
+    from api.constants import APP_VERSION, BACKEND_API_ENDPOINT
+
    logger.debug("Health endpoint called")
-    return {"message": "OK"}
+    return HealthResponse(
+        status="ok",
+        version=APP_VERSION,
+        backend_api_endpoint=BACKEND_API_ENDPOINT,
+    )
--- a/api/routes/public_download.py
+++ b/api/routes/public_download.py
@ -0,0 +1,95 @@
+"""Public download endpoints for workflow recordings and transcripts.
+
+These endpoints provide secure, token-based public access to workflow artifacts
+without requiring authentication. Tokens are generated on-demand when webhooks
+are executed and included in the webhook payload.
+"""
+
+from typing import Literal
+
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import RedirectResponse
+from loguru import logger
+
+from api.db import db_client
+from api.services.storage import get_storage_for_backend
+
+router = APIRouter(prefix="/public/download")
+
+
+@router.get("/workflow/{token}/{artifact_type}")
+async def download_workflow_artifact(
+    token: str,
+    artifact_type: Literal["recording", "transcript"],
+    inline: bool = Query(
+        default=False, description="Display inline in browser instead of download"
+    ),
+):
+    """Download a workflow recording or transcript via public access token.
+
+    This endpoint:
+    1. Validates the public access token
+    2. Looks up the corresponding workflow run
+    3. Generates a signed URL for the requested artifact
+    4. Redirects to the signed URL
+
+    Args:
+        token: The public access token (UUID format)
+        artifact_type: Type of artifact - "recording" or "transcript"
+        inline: If true, sets Content-Disposition to inline for browser preview
+
+    Returns:
+        RedirectResponse to the signed URL (302 redirect)
+
+    Raises:
+        HTTPException 404: If token is invalid or artifact not found
+    """
+    # 1. Lookup workflow run by token
+    workflow_run = await db_client.get_workflow_run_by_public_token(token)
+    if not workflow_run:
+        logger.warning(f"Invalid public access token: {token[:8]}...")
+        raise HTTPException(status_code=404, detail="Invalid or expired token")
+
+    # 2. Get file path based on artifact type
+    if artifact_type == "recording":
+        file_path = workflow_run.recording_url
+    else:  # transcript
+        file_path = workflow_run.transcript_url
+
+    if not file_path:
+        logger.warning(
+            f"Artifact not found: type={artifact_type}, workflow_run_id={workflow_run.id}"
+        )
+        raise HTTPException(
+            status_code=404,
+            detail=f"No {artifact_type} available for this workflow run",
+        )
+
+    # 3. Get storage backend for this workflow run
+    try:
+        storage = get_storage_for_backend(workflow_run.storage_backend)
+    except ValueError as e:
+        logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
+        raise HTTPException(status_code=500, detail="Storage configuration error")
+
+    # 4. Generate signed URL (1 hour expiration)
+    try:
+        signed_url = await storage.aget_signed_url(
+            file_path=file_path,
+            expiration=3600,  # 1 hour
+            force_inline=inline,
+        )
+    except Exception as e:
+        logger.error(f"Failed to generate signed URL: {e}")
+        raise HTTPException(status_code=500, detail="Failed to generate download URL")
+
+    if not signed_url:
+        logger.error(f"Storage returned None for signed URL: {file_path}")
+        raise HTTPException(status_code=500, detail="Failed to generate download URL")
+
+    logger.info(
+        f"Generated signed URL for {artifact_type}: workflow_run_id={workflow_run.id}, token={token[:8]}..."
+    )
+
+    # 5. Redirect to signed URL
+    return RedirectResponse(url=signed_url, status_code=302)
--- a/api/routes/workflow.py
+++ b/api/routes/workflow.py
@ -97,6 +97,24 @@ class WorkflowResponse(BaseModel):
    workflow_configurations: dict | None = None


+class WorkflowListResponse(BaseModel):
+    """Lightweight response for workflow listings (excludes large fields)."""
+
+    id: int
+    name: str
+    status: str
+    created_at: datetime
+    total_runs: int
+
+
+class WorkflowCountResponse(BaseModel):
+    """Response for workflow count endpoint."""
+
+    total: int
+    active: int
+    archived: int
+
+
 class WorkflowTemplateResponse(BaseModel):
    id: int
    template_name: str
@ -359,6 +377,26 @@ class WorkflowSummaryResponse(BaseModel):
    name: str


+@router.get("/count")
+async def get_workflow_count(
+    user: UserModel = Depends(get_user),
+) -> WorkflowCountResponse:
+    """Get workflow counts for the authenticated user's organization.
+
+    This is a lightweight endpoint for checking if the user has workflows,
+    useful for redirect logic without fetching full workflow data.
+    """
+    counts = await db_client.get_workflow_counts(
+        organization_id=user.selected_organization_id
+    )
+
+    return WorkflowCountResponse(
+        total=counts["total"],
+        active=counts["active"],
+        archived=counts["archived"],
+    )
+
+
@router.get("/fetch")
 async def get_workflows(
    user: UserModel = Depends(get_user),
@ -366,45 +404,43 @@ async def get_workflows(
        None,
        description="Filter by status - can be single value (active/archived) or comma-separated (active,archived)",
    ),
-) -> List[WorkflowResponse]:
-    """Get all workflows for the authenticated user's organization"""
+) -> List[WorkflowListResponse]:
+    """Get all workflows for the authenticated user's organization.
+
+    Returns a lightweight response with only essential fields for listing.
+    Use GET /workflow/fetch/{workflow_id} to get full workflow details.
+    """
    # Handle comma-separated status values
    if status and "," in status:
        # Split comma-separated values and fetch workflows for each status
        status_list = [s.strip() for s in status.split(",")]
        all_workflows = []
        for status_value in status_list:
-            workflows = await db_client.get_all_workflows(
+            workflows = await db_client.get_all_workflows_for_listing(
                organization_id=user.selected_organization_id, status=status_value
            )
            all_workflows.extend(workflows)
        workflows = all_workflows
    else:
        # Single status or no status filter
-        workflows = await db_client.get_all_workflows(
+        workflows = await db_client.get_all_workflows_for_listing(
            organization_id=user.selected_organization_id, status=status
        )

-    # Get run counts for each workflow
-    workflow_responses = []
-    for workflow in workflows:
-        run_count = await db_client.get_workflow_run_count(workflow.id)
-        workflow_responses.append(
-            {
-                "id": workflow.id,
-                "name": workflow.name,
-                "status": workflow.status,
-                "created_at": workflow.created_at,
-                "workflow_definition": workflow.workflow_definition_with_fallback,
-                "current_definition_id": workflow.current_definition_id,
-                "template_context_variables": workflow.template_context_variables,
-                "call_disposition_codes": workflow.call_disposition_codes,
-                "workflow_configurations": workflow.workflow_configurations,
-                "total_runs": run_count,
-            }
-        )
+    # Get run counts for all workflows in a single query
+    workflow_ids = [workflow.id for workflow in workflows]
+    run_counts = await db_client.get_workflow_run_counts(workflow_ids)

-    return workflow_responses
+    return [
+        WorkflowListResponse(
+            id=workflow.id,
+            name=workflow.name,
+            status=workflow.status,
+            created_at=workflow.created_at,
+            total_runs=run_counts.get(workflow.id, 0),
+        )
+        for workflow in workflows
+    ]


@router.get("/fetch/{workflow_id}")
--- a/api/services/campaign/call_dispatcher.py
+++ b/api/services/campaign/call_dispatcher.py
@ -170,13 +170,6 @@ class CampaignCallDispatcher:
            )
            raise ValueError(f"Workflow {campaign.workflow_id} not found")

-        # Merge context variables (queued_run context already includes retry info if applicable)
-        initial_context = {
-            **workflow.template_context_variables,
-            **queued_run.context_variables,
-            "campaign_id": campaign.id,
-        }
-
        # Extract phone number
        phone_number = queued_run.context_variables.get("phone_number")
        if not phone_number:
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
            )
            raise ValueError(f"No phone number in queued run {queued_run.id}")

-        # Create workflow run with queued_run_id tracking
-        workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
-
        # Get provider first to determine the mode
        provider = await self.get_telephony_provider(campaign.organization_id)
        workflow_run_mode = provider.PROVIDER_NAME
+        
+        logger.info(f"Provider name: {provider.PROVIDER_NAME}")
+        logger.info(f"Queued run context: {queued_run.context_variables}")

+        # Merge context variables (queued_run context already includes retry info if applicable)
+        initial_context = {
+            **workflow.template_context_variables,
+            **queued_run.context_variables,
+            "campaign_id": campaign.id,
+            "provider": provider.PROVIDER_NAME,
+        }
+        
+        logger.info(f"Final initial_context: {initial_context}")
+
+        # Create workflow run with queued_run_id tracking
+        workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
        try:
            workflow_run = await db_client.create_workflow_run(
                name=workflow_run_name,
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
                to_number=phone_number,
                webhook_url=webhook_url,
                workflow_run_id=workflow_run.id,
+                workflow_id=campaign.workflow_id,
+                user_id=campaign.created_by,
            )

            # Store provider type and metadata in gathered_context
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -300,7 +300,7 @@ TTSConfig = Annotated[
 ###################################################### STT ########################################################################


-DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
+DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
 DEEPGRAM_LANGUAGES = [
    "multi",
    "en",
--- a/api/services/looptalk/core/pipeline_builder.py
+++ b/api/services/looptalk/core/pipeline_builder.py
@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:

        # Set the context and audio_buffer after creation
        engine.set_context(context)
-        engine.set_audio_buffer(audio_buffer)

        context_aggregator = LLMContextAggregatorPair(context)

--- a/api/services/looptalk/internal_serializer.py
+++ b/api/services/looptalk/internal_serializer.py
@ -12,9 +12,8 @@ from pipecat.frames.frames import (
    Frame,
    InputAudioRawFrame,
    OutputAudioRawFrame,
-    StartFrame,
 )
-from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
+from pipecat.serializers.base_serializer import FrameSerializer


 class InternalFrameSerializer(FrameSerializer):
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
    preventing control frames from creating infinite loops.
    """

-    @property
-    def type(self) -> FrameSerializerType:
-        """Internal transport uses binary frames."""
-        return FrameSerializerType.BINARY
-
-    async def setup(self, frame: StartFrame):
-        """No setup required for internal transport."""
-        pass
-
    async def serialize(self, frame: Frame) -> bytes | None:
        """Only serialize audio frames for transmission between agents."""
        # Only pass audio frames between agents
--- a/api/services/pipecat/event_handlers.py
+++ b/api/services/pipecat/event_handlers.py
@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor


-def register_transport_event_handlers(
+def register_event_handlers(
    task: PipelineTask,
    transport,
-    workflow_run_id,
+    workflow_run_id: int,
    engine: PipecatEngine,
    audio_buffer: AudioBufferProcessor,
+    in_memory_logs_buffer: InMemoryLogsBuffer,
+    pipeline_metrics_aggregator: PipelineMetricsAggregator,
    audio_config=AudioConfig,
 ):
-    """Register event handlers for transport events"""
+    """Register all event handlers for transport and task events.

+    Returns:
+        Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
+    """
    # Initialize in-memory buffers with proper audio configuration
    sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
    num_channels = 1  # Pipeline audio is always mono
@ -48,13 +53,35 @@ def register_transport_event_handlers(
    )
    in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)

+    # Track both events to ensure LLM is only triggered after both occur
+    ready_state = {
+        "pipeline_started": False,
+        "client_connected": False,
+        "llm_triggered": False,
+    }
+
+    async def maybe_trigger_llm():
+        """Trigger LLM only after both pipeline_started and client_connected events."""
+        if (
+            ready_state["pipeline_started"]
+            and ready_state["client_connected"]
+            and not ready_state["llm_triggered"]
+        ):
+            ready_state["llm_triggered"] = True
+            logger.debug(
+                "Both pipeline_started and client_connected received - triggering initial LLM generation"
+            )
+            await engine.llm.queue_frame(LLMContextFrame(engine.context))
+
    @transport.event_handler("on_client_connected")
-    async def on_client_connected(transport, participant):
-        logger.debug("In on_client_connected callback handler - initializing workflow")
+    async def on_client_connected(_transport, _participant):
+        logger.debug("In on_client_connected callback handler")
        await audio_buffer.start_recording()
+        ready_state["client_connected"] = True
+        await maybe_trigger_llm()

    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, participant):
+    async def on_client_disconnected(_transport, _participant):
        call_disposed = engine.is_call_disposed()

        logger.debug(
@ -69,33 +96,16 @@ def register_transport_event_handlers(
        if not call_disposed:
            await task.cancel()

-    # Return the buffers so they can be passed to other handlers
-    return in_memory_audio_buffer, in_memory_transcript_buffer
-
-
-def register_task_event_handler(
-    workflow_run_id: int,
-    engine: PipecatEngine,
-    task: PipelineTask,
-    transport,
-    audio_buffer: AudioBufferProcessor,
-    in_memory_audio_buffer: InMemoryAudioBuffer,
-    in_memory_transcript_buffer: InMemoryTranscriptBuffer,
-    in_memory_logs_buffer: InMemoryLogsBuffer,
-    pipeline_metrics_aggregator: PipelineMetricsAggregator,
-):
    @task.event_handler("on_pipeline_started")
-    async def on_pipeline_started(task: PipelineTask, frame: Frame):
-        logger.debug(
-            "In on_pipeline_started callback handler - triggering initial LLM generation"
-        )
-        # Trigger initial LLM generation after pipeline has started
-        await engine.llm.queue_frame(LLMContextFrame(engine.context))
+    async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
+        logger.debug("In on_pipeline_started callback handler")
+        ready_state["pipeline_started"] = True
+        await maybe_trigger_llm()

    @task.event_handler("on_pipeline_finished")
    async def on_pipeline_finished(
        task: PipelineTask,
-        frame: Frame,
+        _frame: Frame,
    ):
        logger.debug(f"In on_pipeline_finished callback handler")

@ -207,14 +217,13 @@ def register_task_event_handler(
        if workflow_run and workflow_run.campaign_id:
            await campaign_call_dispatcher.release_call_slot(workflow_run_id)

-        # Write buffers to temp files and enqueue S3 upload
+        # Write buffers to temp files and enqueue combined processing task
+        audio_temp_path = None
+        transcript_temp_path = None
+
        try:
-            # Only upload if buffers have content
            if not in_memory_audio_buffer.is_empty:
                audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
-                await enqueue_job(
-                    FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
-                )
            else:
                logger.debug("Audio buffer is empty, skipping upload")

@ -222,11 +231,6 @@ def register_task_event_handler(
                transcript_temp_path = (
                    await in_memory_transcript_buffer.write_to_temp_file()
                )
-                await enqueue_job(
-                    FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
-                    workflow_run_id,
-                    transcript_temp_path,
-                )
            else:
                logger.debug("Transcript buffer is empty, skipping upload")

@ -234,10 +238,18 @@ def register_task_event_handler(
            logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)

        await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
+
+        # Combined task: uploads artifacts then runs integrations sequentially
        await enqueue_job(
-            FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
+            FunctionNames.PROCESS_WORKFLOW_COMPLETION,
+            workflow_run_id,
+            audio_temp_path,
+            transcript_temp_path,
        )

+    # Return the buffers so they can be passed to other handlers
+    return in_memory_audio_buffer, in_memory_transcript_buffer
+

 def register_audio_data_handler(
    audio_buffer: AudioBufferProcessor,
@ -260,18 +272,26 @@ def register_audio_data_handler(
            # Could implement overflow to disk here if needed


-def register_transcript_handler(
-    transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
+def register_transcript_handlers(
+    user_aggregator,
+    assistant_aggregator,
+    workflow_run_id,
+    in_memory_buffer: InMemoryTranscriptBuffer,
 ):
-    """Register event handler for transcript updates"""
+    """Register event handlers for transcript updates on context aggregators.

-    @transcript.event_handler("on_transcript_update")
-    async def on_transcript_update(processor, frame):
-        transcript_text = ""
-        for msg in frame.messages:
-            timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
-            line = f"{timestamp}{msg.role}: {msg.content}\n"
-            transcript_text += line
+    Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
+    transcripts as turns complete, following the event-based pattern.
+    """

-        # Use in-memory buffer
-        await in_memory_buffer.append(transcript_text)
+    @user_aggregator.event_handler("on_user_turn_stopped")
+    async def on_user_turn_stopped(aggregator, strategy, message):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}user: {message.content}\n"
+        await in_memory_buffer.append(line)
+
+    @assistant_aggregator.event_handler("on_assistant_turn_stopped")
+    async def on_assistant_turn_stopped(aggregator, message):
+        timestamp = f"[{message.timestamp}] " if message.timestamp else ""
+        line = f"{timestamp}assistant: {message.content}\n"
+        await in_memory_buffer.append(line)
--- a/api/services/pipecat/pipeline_builder.py
+++ b/api/services/pipecat/pipeline_builder.py
@ -1,5 +1,4 @@
 import os
-from typing import TYPE_CHECKING

 from loguru import logger

@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
-from pipecat.processors.transcript_processor import TranscriptProcessor
 from pipecat.utils.context import turn_var

-if TYPE_CHECKING:
-    from api.services.workflow.pipecat_engine import PipecatEngine

-
-def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
+def create_pipeline_components(audio_config: AudioConfig):
    """Create and return the main pipeline components with proper audio configuration"""
    logger.info(f"Creating pipeline components with audio config: {audio_config}")

@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
        buffer_size=audio_config.buffer_size_bytes,
    )

-    transcript = TranscriptProcessor(
-        assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
-    )
-
    context = LLMContext()

-    return audio_buffer, transcript, context
+    return audio_buffer, context


 def build_pipeline(
    transport,
    stt,
-    transcript,
    audio_buffer,
    llm,
    tts,
    user_context_aggregator,
    assistant_context_aggregator,
    pipeline_engine_callback_processor,
-    stt_mute_filter,
    pipeline_metrics_aggregator,
-    user_idle_disconnect,
    voicemail_detector=None,
 ):
    """Build the main pipeline with all components.
@ -63,7 +51,7 @@ def build_pipeline(
    # Build processors list with optional voicemail detection
    processors = [
        transport.input(),  # Transport user input
-        stt,  # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
+        stt,
    ]

    # Insert voicemail detector after STT if enabled
@ -76,16 +64,12 @@ def build_pipeline(
    # Continue with the rest of the pipeline
    processors.extend(
        [
-            stt_mute_filter,  # STTMuteFilters don't let VAD related events pass through if muted
-            user_idle_disconnect,
-            transcript.user(),
            user_context_aggregator,
            llm,  # LLM
            pipeline_engine_callback_processor,
            tts,  # TTS
            transport.output(),  # Transport bot output
            audio_buffer,  # AudioBufferProcessor - records both input and output audio
-            transcript.assistant(),
            assistant_context_aggregator,  # Assistant spoken responses
            pipeline_metrics_aggregator,
        ]
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
    """Create a pipeline task with appropriate parameters"""
    # Set up pipeline params with audio configuration if provided
    pipeline_params = PipelineParams(
-        allow_interruptions=True,
        enable_metrics=True,
        enable_usage_metrics=True,
        send_initial_empty_metrics=False,
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
        pipeline,
        params=pipeline_params,
        enable_tracing=ENABLE_TRACING,
+        enable_rtvi=False,
        conversation_id=f"{workflow_run_id}",
    )

--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -7,12 +7,12 @@ from loguru import logger
 from api.db import db_client
 from api.db.models import WorkflowModel
 from api.enums import WorkflowRunMode
+from api.services.configuration.registry import ServiceProviders
 from api.services.pipecat.audio_config import AudioConfig, create_audio_config
 from api.services.pipecat.event_handlers import (
    register_audio_data_handler,
-    register_task_event_handler,
-    register_transcript_handler,
-    register_transport_event_handlers,
+    register_event_handlers,
+    register_transcript_handlers,
 )
 from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
 from api.services.pipecat.pipeline_builder import (
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.workflow import WorkflowGraph
 from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
 from pipecat.pipeline.base_task import PipelineTaskParams
-from pipecat.processors.aggregators.llm_response import (
+from pipecat.processors.aggregators.llm_response_universal import (
    LLMAssistantAggregatorParams,
+    LLMContextAggregatorPair,
    LLMUserAggregatorParams,
 )
-from pipecat.processors.aggregators.llm_response_universal import (
-    LLMContextAggregatorPair,
-)
-from pipecat.processors.filters.stt_mute_filter import (
-    STTMuteConfig,
-    STTMuteFilter,
-    STTMuteStrategy,
-)
-from pipecat.processors.user_idle_processor import UserIdleProcessor
 from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
+from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
+from pipecat.turns.user_start import (
+    ExternalUserTurnStartStrategy,
+    TranscriptionUserTurnStartStrategy,
+)
+from pipecat.turns.user_start.vad_user_turn_start_strategy import (
+    VADUserTurnStartStrategy,
+)
+from pipecat.turns.user_stop import (
+    ExternalUserTurnStopStrategy,
+    TranscriptionUserTurnStopStrategy,
+)
+from pipecat.turns.user_turn_strategies import UserTurnStrategies
 from pipecat.utils.context import set_current_run_id
 from pipecat.utils.enums import EndTaskReason
 from pipecat.utils.tracing.context_registry import ContextProviderRegistry
@ -517,12 +522,11 @@ async def _run_pipeline(
        embeddings_model=embeddings_model,
    )

-    # Create pipeline components with audio configuration and engine
-    audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
+    # Create pipeline components with audio configuration
+    audio_buffer, context = create_pipeline_components(audio_config)

    # Set the context and audio_buffer after creation
    engine.set_context(context)
-    engine.set_audio_buffer(audio_buffer)

    # Set Stasis connection for immediate transfers (if available)
    if stasis_connection:
@ -532,7 +536,31 @@ async def _run_pipeline(
        expect_stripped_words=True,
        correct_aggregation_callback=engine.create_aggregation_correction_callback(),
    )
-    user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
+
+    # Configure turn strategies based on STT provider and model
+    # Deepgram Flux uses external turn detection (VAD + External start/stop)
+    # Other models use transcription-based turn detection with smart turn analyzer
+    is_deepgram_flux = (
+        user_config.stt.provider == ServiceProviders.DEEPGRAM.value
+        and user_config.stt.model == "flux-general-en"
+    )
+
+    if is_deepgram_flux:
+        user_turn_strategies = UserTurnStrategies(
+            start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
+            stop=[ExternalUserTurnStopStrategy()],
+        )
+    else:
+        user_turn_strategies = UserTurnStrategies(
+            start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
+            stop=[TranscriptionUserTurnStopStrategy()],
+        )
+
+    user_params = LLMUserAggregatorParams(
+        user_turn_strategies=user_turn_strategies,
+        user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
+        user_idle_timeout=max_user_idle_timeout,
+    )
    context_aggregator = LLMContextAggregatorPair(
        context, assistant_params=assistant_params, user_params=user_params
    )
@ -547,25 +575,20 @@ async def _run_pipeline(

    pipeline_metrics_aggregator = PipelineMetricsAggregator()

-    # Create STT mute filter using the selected strategies and the engine's callback
-    stt_mute_filter = STTMuteFilter(
-        config=STTMuteConfig(
-            strategies={
-                STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
-                STTMuteStrategy.CUSTOM,
-            },
-            should_mute_callback=engine.create_should_mute_callback(),
-        )
-    )
-
-    # Use engine's user idle callback with configured timeout
-    user_idle_disconnect = UserIdleProcessor(
-        callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
-    )
-
    user_context_aggregator = context_aggregator.user()
    assistant_context_aggregator = context_aggregator.assistant()

+    # Register user idle event handlers
+    user_idle_handler = engine.create_user_idle_handler()
+
+    @user_context_aggregator.event_handler("on_user_turn_idle")
+    async def on_user_turn_idle(aggregator):
+        await user_idle_handler.handle_idle(aggregator)
+
+    @user_context_aggregator.event_handler("on_user_turn_started")
+    async def on_user_turn_started(aggregator, strategy):
+        user_idle_handler.reset()
+
    # Create voicemail detector if enabled in the workflow's start node
    voicemail_detector = None
    start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
@ -592,16 +615,13 @@ async def _run_pipeline(
    pipeline = build_pipeline(
        transport,
        stt,
-        transcript,
        audio_buffer,
        llm,
        tts,
        user_context_aggregator,
        assistant_context_aggregator,
        pipeline_engine_callback_processor,
-        stt_mute_filter,
        pipeline_metrics_aggregator,
-        user_idle_disconnect,
        voicemail_detector=voicemail_detector,
    )

@ -614,18 +634,6 @@ async def _run_pipeline(
    # Initialize the engine to set the initial context
    await engine.initialize()

-    # Register event handlers
-    in_memory_audio_buffer, in_memory_transcript_buffer = (
-        register_transport_event_handlers(
-            task,
-            transport,
-            workflow_run_id,
-            engine=engine,
-            audio_buffer=audio_buffer,
-            audio_config=audio_config,
-        )
-    )
-
    # Add real-time feedback observer if WebSocket sender is available
    # Note: ws_sender was already fetched earlier for node_transition_callback
    if ws_sender:
@ -635,21 +643,24 @@ async def _run_pipeline(
        )
        task.add_observer(feedback_observer)

-    register_task_event_handler(
-        workflow_run_id,
-        engine,
+    # Register event handlers
+    in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
        task,
        transport,
-        audio_buffer,
-        in_memory_audio_buffer,
-        in_memory_transcript_buffer,
-        in_memory_logs_buffer,
-        pipeline_metrics_aggregator,
+        workflow_run_id,
+        engine=engine,
+        audio_buffer=audio_buffer,
+        in_memory_logs_buffer=in_memory_logs_buffer,
+        pipeline_metrics_aggregator=pipeline_metrics_aggregator,
+        audio_config=audio_config,
    )

    register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
-    register_transcript_handler(
-        transcript, workflow_run_id, in_memory_transcript_buffer
+    register_transcript_handlers(
+        user_context_aggregator,
+        assistant_context_aggregator,
+        workflow_run_id,
+        in_memory_transcript_buffer,
    )

    try:
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
 from api.services.configuration.registry import ServiceProviders
 from pipecat.services.azure.llm import AzureLLMService
 from pipecat.services.cartesia.stt import CartesiaSTTService
+from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
 from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
 from pipecat.services.deepgram.tts import DeepgramTTSService
 from pipecat.services.dograh.llm import DograhLLMService
@ -34,6 +35,20 @@ def create_stt_service(user_config):
        f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
    )
    if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
+        # Check if using Flux model (English-only, no language selection)
+        if user_config.stt.model == "flux-general-en":
+            logger.debug("Using DeepGram Flux Model")
+            return DeepgramFluxSTTService(
+                api_key=user_config.stt.api_key,
+                model=user_config.stt.model,
+                params=DeepgramFluxSTTService.InputParams(
+                    eot_timeout_ms=3000,
+                    eot_threshold=0.7,
+                ),
+                should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
+            )
+
+        # Other models than flux
        # Use language from user config, defaulting to "multi" for multilingual support
        language = getattr(user_config.stt, "language", None) or "multi"
        live_options = LiveOptions(
@ -44,7 +59,9 @@ def create_stt_service(user_config):
        )
        logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
        return DeepgramSTTService(
-            live_options=live_options, api_key=user_config.stt.api_key
+            live_options=live_options,
+            api_key=user_config.stt.api_key,
+            should_interrupt=False,  # Let UserAggregator take care of sending InterruptionFrame
        )
    elif user_config.stt.provider == ServiceProviders.OPENAI.value:
        return OpenAISTTService(
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -2,10 +2,9 @@ import os

 from fastapi import WebSocket

-from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
+from api.constants import APP_ROOT_DIR
 from api.db import db_client
 from api.enums import OrganizationConfigurationKey
-from api.services.looptalk.internal_transport import InternalTransport
 from api.services.pipecat.audio_config import AudioConfig
 from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
 from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
    StasisRTPTransport,
    StasisRTPTransportParams,
 )
-from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
 from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
 from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
-from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
-from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
 from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
 from pipecat.serializers.twilio import TwilioFrameSerializer
 from pipecat.serializers.vobiz import VobizFrameSerializer
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
 )


-def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
-    """Create a turn analyzer backed by the local Smart Turn HTTP service.
-
-    Args:
-        workflow_run_id: ID of the workflow run for turn analyzer context
-        audio_config: Audio configuration containing pipeline sample rate
-    """
-    if ENABLE_SMART_TURN:
-        return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
-
-    return None
-
-
 async def create_twilio_transport(
    websocket_client: WebSocket,
    stream_sid: str,
@ -78,8 +61,6 @@ async def create_twilio_transport(
            f"Incomplete Twilio configuration for organization {organization_id}"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    serializer = TwilioFrameSerializer(
        stream_sid=stream_sid,
        call_sid=call_sid,
@ -119,11 +100,7 @@ async def create_twilio_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -158,8 +135,6 @@ async def create_cloudonix_transport(
            f"Required: bearer_token, domain_id"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    from pipecat.serializers.cloudonix import CloudonixFrameSerializer

    serializer = CloudonixFrameSerializer(
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -238,8 +209,6 @@ async def create_vonage_transport(
            f"Incomplete Vonage configuration for organization {organization_id}"
        )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    serializer = VonageFrameSerializer(
        call_uuid=call_uuid,
        application_id=application_id,
@ -283,11 +252,7 @@ async def create_vonage_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -337,8 +302,6 @@ async def create_vobiz_transport(
        f"from_numbers={len(config.get('from_numbers', []))} numbers"
    )

-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
-
    # Use VobizFrameSerializer for Vobiz WebSocket protocol
    serializer = VobizFrameSerializer(
        stream_id=stream_id,
@ -389,11 +352,7 @@ async def create_vobiz_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -411,7 +370,6 @@ def create_webrtc_transport(
    ambient_noise_config: dict | None = None,
 ):
    """Create a transport for WebRTC connections"""
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)

    return SmallWebRTCTransport(
        webrtc_connection=webrtc_connection,
@ -445,10 +403,6 @@ def create_webrtc_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -461,7 +415,6 @@ def create_stasis_transport(
    ambient_noise_config: dict | None = None,
 ):
    """Create a transport for ARI connections"""
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)

    serializer = StasisRTPFrameSerializer(
        StasisRTPFrameSerializer.InputParams(
@ -502,11 +455,7 @@ def create_stasis_transport(
                if ambient_noise_config and ambient_noise_config.get("enabled", False)
                else SilenceAudioMixer()
            ),
-            turn_analyzer=turn_analyzer,
            serializer=serializer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
        ),
    )

@ -528,46 +477,44 @@ def create_internal_transport(
    Returns:
        InternalTransport instance configured with turn analyzer
    """
-    turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
+    pass
+    # Commented out because looptalk coming in the regular import flow
+    # was causing issue. May be move this to looptalk/orchestrator.py

    # Create and return the internal transport with latency
-    return InternalTransport(
-        params=TransportParams(
-            audio_out_enabled=True,
-            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_channels=1,
-            audio_in_enabled=True,
-            audio_in_sample_rate=audio_config.transport_in_sample_rate,
-            audio_in_channels=1,
-            vad_analyzer=(
-                SileroVADAnalyzer(
-                    params=VADParams(
-                        confidence=vad_config.get("confidence", 0.7),
-                        start_secs=vad_config.get("start_seconds", 0.4),
-                        stop_secs=vad_config.get("stop_seconds", 0.8),
-                        min_volume=vad_config.get("minimum_volume", 0.6),
-                    )
-                )
-                if vad_config
-                else SileroVADAnalyzer()
-            ),
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
-            turn_analyzer=turn_analyzer,
-            audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
-            if ENABLE_RNNOISE
-            else None,
-        ),
-        latency_seconds=latency_seconds,
-    )
+    # return InternalTransport(
+    #     params=TransportParams(
+    #         audio_out_enabled=True,
+    #         audio_out_sample_rate=audio_config.transport_out_sample_rate,
+    #         audio_out_channels=1,
+    #         audio_in_enabled=True,
+    #         audio_in_sample_rate=audio_config.transport_in_sample_rate,
+    #         audio_in_channels=1,
+    #         vad_analyzer=(
+    #             SileroVADAnalyzer(
+    #                 params=VADParams(
+    #                     confidence=vad_config.get("confidence", 0.7),
+    #                     start_secs=vad_config.get("start_seconds", 0.4),
+    #                     stop_secs=vad_config.get("stop_seconds", 0.8),
+    #                     min_volume=vad_config.get("minimum_volume", 0.6),
+    #                 )
+    #             )
+    #             if vad_config
+    #             else SileroVADAnalyzer()
+    #         ),
+    #         audio_out_mixer=(
+    #             SoundfileMixer(
+    #                 sound_files={
+    #                     "office": APP_ROOT_DIR
+    #                     / "assets"
+    #                     / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
+    #                 },
+    #                 default_sound="office",
+    #                 volume=ambient_noise_config.get("volume", 0.3),
+    #             )
+    #             if ambient_noise_config and ambient_noise_config.get("enabled", False)
+    #             else SilenceAudioMixer()
+    #         ),
+    #     ),
+    #     latency_seconds=latency_seconds,
+    # )
--- a/api/services/telephony/stasis_rtp_serializer.py
+++ b/api/services/telephony/stasis_rtp_serializer.py
@ -15,6 +15,8 @@ The serializer:
 from typing import Optional

 from loguru import logger
+from pydantic import BaseModel
+
 from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
 from pipecat.frames.frames import (
    AudioRawFrame,
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
    InputAudioRawFrame,
    StartFrame,
 )
-from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
-from pydantic import BaseModel
+from pipecat.serializers.base_serializer import FrameSerializer


 class StasisRTPFrameSerializer(FrameSerializer):
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
        # Resampler shared between encode / decode paths
        self._resampler = create_default_resampler()

-    @property
-    def type(self) -> FrameSerializerType:
-        """Stasis uses raw bytes → BINARY."""
-        return FrameSerializerType.BINARY
-
    async def setup(self, frame: StartFrame):
        """Remember pipeline configuration."""
        self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason

 if TYPE_CHECKING:
    from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
-    from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
    from pipecat.services.anthropic.llm import AnthropicLLMService
    from pipecat.services.google.llm import GoogleLLMService
    from pipecat.services.openai.llm import OpenAILLMService
@ -64,7 +63,6 @@ class PipecatEngine:
        transport: Optional[BaseTransport] = None,
        workflow: WorkflowGraph,
        call_context_vars: dict,
-        audio_buffer: Optional["AudioBuffer"] = None,
        workflow_run_id: Optional[int] = None,
        node_transition_callback: Optional[
            Callable[[str, Optional[str]], Awaitable[None]]
@ -78,7 +76,6 @@ class PipecatEngine:
        self.transport = transport
        self.workflow = workflow
        self._call_context_vars = call_context_vars
-        self._audio_buffer = audio_buffer
        self._workflow_run_id = workflow_run_id
        self._node_transition_callback = node_transition_callback
        self._initialized = False
@ -204,6 +201,7 @@ class PipecatEngine:
            logger.info(f"Arguments: {function_call_params.arguments}")
            await self.set_node(transition_to_node)
            try:
+
                async def on_context_updated() -> None:
                    """
                    pipecat framework will run this function after the function call result has been updated in the context.
@ -215,6 +213,12 @@ class PipecatEngine:
                        self._current_node
                    )

+                    # Queue EndFrame if we just transitioned to EndNode
+                    if self._current_node.is_end:
+                        await self.send_end_task_frame(
+                            EndTaskReason.USER_QUALIFIED.value
+                        )
+
                result = {"status": "done"}

                properties = FunctionCallResultProperties(
@ -478,8 +482,6 @@ class PipecatEngine:
        if node.extraction_enabled and node.extraction_variables:
            await self._perform_variable_extraction_if_needed(node)

-        await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
-
    async def _handle_agent_node(self, node: Node) -> None:
        """Handle agent node execution."""
        if node.is_static:
@ -680,12 +682,12 @@ class PipecatEngine:
        """
        return engine_callbacks.create_should_mute_callback(self)

-    def create_user_idle_callback(self):
+    def create_user_idle_handler(self):
        """
-        This callback is called when the user is idle for a certain duration.
-        We use this to either play the static text or end the call
+        Returns a UserIdleHandler that manages user-idle timeouts with state.
+        The handler tracks retry count and handles escalating prompts.
        """
-        return engine_callbacks.create_user_idle_callback(self)
+        return engine_callbacks.create_user_idle_handler(self)

    def create_max_duration_callback(self):
        """
@ -721,14 +723,6 @@ class PipecatEngine:
        """
        self.task = task

-    def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
-        """Set the audio buffer.
-
-        This allows setting the audio buffer after the engine has been created,
-        which is useful when the audio buffer needs to be created after the engine.
-        """
-        self._audio_buffer = audio_buffer
-
    def set_stasis_connection(
        self, connection: Optional["StasisRTPConnection"]
    ) -> None:
--- a/api/services/workflow/pipecat_engine_callbacks.py
+++ b/api/services/workflow/pipecat_engine_callbacks.py
@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason

 if TYPE_CHECKING:
    from api.services.workflow.pipecat_engine import PipecatEngine
-    from pipecat.processors.user_idle_processor import UserIdleProcessor


 # ---------------------------------------------------------------------------
@ -57,33 +56,43 @@ def create_should_mute_callback(
 # ---------------------------------------------------------------------------


-def create_user_idle_callback(engine: "PipecatEngine"):
-    """Return a callback that handles user-idle timeouts."""
+class UserIdleHandler:
+    """Helper class to manage user idle retry logic with state."""

-    async def handle_user_idle(
-        user_idle: "UserIdleProcessor", retry_count: int
-    ) -> bool:
-        logger.debug(f"Handling user_idle, attempt: {retry_count}")
+    def __init__(self, engine: "PipecatEngine"):
+        self._engine = engine
+        self._retry_count = 0

-        if retry_count == 1:
+    def reset(self):
+        """Reset the retry count when user becomes active."""
+        self._retry_count = 0
+
+    async def handle_idle(self, aggregator):
+        """Handle user idle event with escalating prompts."""
+        self._retry_count += 1
+        logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
+
+        if self._retry_count == 1:
            message = {
                "role": "system",
                "content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
            }
-            await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
-            return True
+            await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
+            return

        message = {
            "role": "system",
            "content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
        }
-        await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
-        await engine.send_end_task_frame(
+        await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
+        await self._engine.send_end_task_frame(
            EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
        )
-        return False

-    return handle_user_idle
+
+def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
+    """Return a UserIdleHandler that manages user-idle timeouts with state."""
+    return UserIdleHandler(engine)


 # ---------------------------------------------------------------------------
--- a/api/tasks/arq.py
+++ b/api/tasks/arq.py
@ -49,8 +49,7 @@ from api.tasks.campaign_tasks import (
 from api.tasks.knowledge_base_processing import process_knowledge_base_document
 from api.tasks.run_integrations import run_integrations_post_workflow_run
 from api.tasks.s3_upload import (
-    upload_audio_to_s3,
-    upload_transcript_to_s3,
+    process_workflow_completion,
    upload_voicemail_audio_to_s3,
 )

@ -59,9 +58,8 @@ class WorkerSettings:
    functions = [
        calculate_workflow_run_cost,
        run_integrations_post_workflow_run,
-        upload_audio_to_s3,
-        upload_transcript_to_s3,
        upload_voicemail_audio_to_s3,
+        process_workflow_completion,
        sync_campaign_source,
        process_campaign_batch,
        monitor_campaign_progress,
--- a/api/tasks/function_names.py
+++ b/api/tasks/function_names.py
@ -1,8 +1,7 @@
 class FunctionNames:
    CALCULATE_WORKFLOW_RUN_COST = "calculate_workflow_run_cost"
    RUN_INTEGRATIONS_POST_WORKFLOW_RUN = "run_integrations_post_workflow_run"
-    UPLOAD_AUDIO_TO_S3 = "upload_audio_to_s3"
-    UPLOAD_TRANSCRIPT_TO_S3 = "upload_transcript_to_s3"
+    PROCESS_WORKFLOW_COMPLETION = "process_workflow_completion"
    UPLOAD_VOICEMAIL_AUDIO_TO_S3 = "upload_voicemail_audio_to_s3"
    SYNC_CAMPAIGN_SOURCE = "sync_campaign_source"
    PROCESS_CAMPAIGN_BATCH = "process_campaign_batch"
--- a/api/tasks/run_integrations.py
+++ b/api/tasks/run_integrations.py
@ -1,10 +1,11 @@
 """Execute webhook integrations after workflow run completion."""

-from typing import Any, Dict
+from typing import Any, Dict, Optional

 import httpx
 from loguru import logger

+from api.constants import BACKEND_API_ENDPOINT
 from api.db import db_client
 from api.db.models import WorkflowRunModel
 from api.utils.credential_auth import build_auth_header
@ -54,10 +55,13 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):

        logger.info(f"Found {len(webhook_nodes)} webhook nodes to execute")

-        # Step 4: Build render context
-        render_context = _build_render_context(workflow_run)
+        # Step 4: Generate public access token (on-demand, only when webhooks exist)
+        public_token = await db_client.ensure_public_access_token(workflow_run_id)

-        # Step 5: Execute each webhook node
+        # Step 5: Build render context
+        render_context = _build_render_context(workflow_run, public_token)
+
+        # Step 6: Execute each webhook node
        for node in webhook_nodes:
            webhook_data = node.get("data", {})
            try:
@ -77,9 +81,19 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
        raise


-def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
-    """Build the context dict for template rendering."""
-    return {
+def _build_render_context(
+    workflow_run: WorkflowRunModel, public_token: Optional[str] = None
+) -> Dict[str, Any]:
+    """Build the context dict for template rendering.
+
+    Args:
+        workflow_run: The workflow run model
+        public_token: Optional public access token for download URLs
+
+    Returns:
+        Dict containing all fields available for template rendering
+    """
+    context = {
        # Top-level fields
        "workflow_run_id": workflow_run.id,
        "workflow_run_name": workflow_run.name,
@ -89,10 +103,25 @@ def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
        "initial_context": workflow_run.initial_context or {},
        "gathered_context": workflow_run.gathered_context or {},
        "cost_info": workflow_run.usage_info or {},
-        "recording_url": getattr(workflow_run, "recording_url", None),
-        "transcript_url": getattr(workflow_run, "transcript_url", None),
    }

+    # Add public download URLs if token is available
+    if public_token:
+        base_url = (
+            f"{BACKEND_API_ENDPOINT}/api/v1/public/download/workflow/{public_token}"
+        )
+        context["recording_url"] = (
+            f"{base_url}/recording" if workflow_run.recording_url else None
+        )
+        context["transcript_url"] = (
+            f"{base_url}/transcript" if workflow_run.transcript_url else None
+        )
+    else:
+        context["recording_url"] = workflow_run.recording_url
+        context["transcript_url"] = workflow_run.transcript_url
+
+    return context
+

 async def _execute_webhook_node(
    webhook_data: Dict[str, Any],
--- a/api/tasks/s3_upload.py
+++ b/api/tasks/s3_upload.py
@ -1,129 +1,27 @@
 import os
+from typing import Optional

 from loguru import logger
-from pipecat.utils.context import set_current_run_id

 from api.db import db_client
 from api.services.storage import get_current_storage_backend, storage_fs
-
-
-async def upload_audio_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
-    """Upload audio file from temp path to S3."""
-    run_id = str(workflow_run_id)
-    set_current_run_id(run_id)
-
-    logger.info(f"Starting audio upload to S3 from {temp_file_path}")
-
-    try:
-        # Verify temp file exists
-        if not os.path.exists(temp_file_path):
-            logger.error(f"Temp audio file not found: {temp_file_path}")
-            raise FileNotFoundError(f"Temp audio file not found: {temp_file_path}")
-
-        file_size = os.path.getsize(temp_file_path)
-        logger.debug(f"Audio file size: {file_size} bytes")
-
-        recording_url = f"recordings/{workflow_run_id}.wav"
-        storage_backend = get_current_storage_backend()
-
-        logger.info(
-            f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for audio upload - workflow_run_id: {workflow_run_id}"
-        )
-
-        await storage_fs.aupload_file(temp_file_path, recording_url)
-
-        # Update DB with recording URL and storage backend
-        await db_client.update_workflow_run(
-            run_id=workflow_run_id,
-            recording_url=recording_url,
-            storage_backend=storage_backend.value,
-        )
-
-        logger.info(
-            f"Successfully uploaded audio to {storage_backend.name}: {recording_url} (stored backend: {storage_backend.name})"
-        )
-
-    except Exception as e:
-        logger.error(f"Error uploading audio to S3 for workflow {workflow_run_id}: {e}")
-        raise
-    finally:
-        # Clean up temp file
-        if os.path.exists(temp_file_path):
-            try:
-                os.remove(temp_file_path)
-                logger.debug(f"Cleaned up temp audio file: {temp_file_path}")
-            except Exception as e:
-                logger.warning(
-                    f"Failed to clean up temp audio file {temp_file_path}: {e}"
-                )
-
-
-async def upload_transcript_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
-    """Upload transcript file from temp path to S3."""
-    run_id = str(workflow_run_id)
-    set_current_run_id(run_id)
-
-    logger.info(f"Starting transcript upload to S3 from {temp_file_path}")
-
-    try:
-        # Verify temp file exists
-        if not os.path.exists(temp_file_path):
-            logger.error(f"Temp transcript file not found: {temp_file_path}")
-            raise FileNotFoundError(f"Temp transcript file not found: {temp_file_path}")
-
-        file_size = os.path.getsize(temp_file_path)
-        logger.debug(f"Transcript file size: {file_size} bytes")
-
-        transcript_url = f"transcripts/{workflow_run_id}.txt"
-        storage_backend = get_current_storage_backend()
-
-        logger.info(
-            f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for transcript upload - workflow_run_id: {workflow_run_id}"
-        )
-
-        await storage_fs.aupload_file(temp_file_path, transcript_url)
-
-        # Update DB with transcript URL and storage backend
-        await db_client.update_workflow_run(
-            run_id=workflow_run_id,
-            transcript_url=transcript_url,
-            storage_backend=storage_backend.value,
-        )
-
-        logger.info(
-            f"Successfully uploaded transcript to {storage_backend.name}: {transcript_url} (stored backend: {storage_backend.name})"
-        )
-
-    except Exception as e:
-        logger.error(
-            f"Error uploading transcript to S3 for workflow {workflow_run_id}: {e}"
-        )
-        raise
-    finally:
-        # Clean up temp file
-        if os.path.exists(temp_file_path):
-            try:
-                os.remove(temp_file_path)
-                logger.debug(f"Cleaned up temp transcript file: {temp_file_path}")
-            except Exception as e:
-                logger.warning(
-                    f"Failed to clean up temp transcript file {temp_file_path}: {e}"
-                )
+from api.tasks.run_integrations import run_integrations_post_workflow_run
+from pipecat.utils.context import set_current_run_id


 async def upload_voicemail_audio_to_s3(
-    ctx,
+    _ctx,
    workflow_run_id: int,
    temp_file_path: str,
    s3_key: str,
 ):
    """Upload voicemail detection audio from temp file to S3.

-    This function is similar to upload_audio_to_s3 but handles voicemail-specific
-    paths and doesn't update the workflow run's recording_url field.
+    Handles voicemail-specific paths and doesn't update the workflow run's
+    recording_url field.

    Args:
-        ctx: ARQ context
+        _ctx: ARQ context (unused)
        workflow_run_id: The workflow run ID
        temp_file_path: Path to the temporary WAV file
        s3_key: The S3 key where the file should be uploaded
@ -161,7 +59,7 @@ async def upload_voicemail_audio_to_s3(
        )
        raise
    finally:
-        # Clean up temp file (same pattern as upload_audio_to_s3)
+        # Clean up temp file
        if os.path.exists(temp_file_path):
            try:
                os.remove(temp_file_path)
@ -170,3 +68,104 @@ async def upload_voicemail_audio_to_s3(
                logger.warning(
                    f"Failed to clean up temp voicemail audio file {temp_file_path}: {e}"
                )
+
+
+async def process_workflow_completion(
+    _ctx,
+    workflow_run_id: int,
+    audio_temp_path: Optional[str] = None,
+    transcript_temp_path: Optional[str] = None,
+):
+    """Process workflow completion: upload artifacts and run integrations.
+
+    This task combines audio upload, transcript upload, and webhook integrations
+    into a single sequential task to ensure integrations run after uploads complete.
+
+    Args:
+        _ctx: ARQ context (unused)
+        workflow_run_id: The workflow run ID
+        audio_temp_path: Optional path to temp audio file
+        transcript_temp_path: Optional path to temp transcript file
+    """
+    run_id = str(workflow_run_id)
+    set_current_run_id(run_id)
+
+    logger.info(f"Processing workflow completion for run {workflow_run_id}")
+
+    storage_backend = get_current_storage_backend()
+
+    # Step 1: Upload audio if provided
+    if audio_temp_path:
+        try:
+            if os.path.exists(audio_temp_path):
+                file_size = os.path.getsize(audio_temp_path)
+                logger.debug(f"Audio file size: {file_size} bytes")
+
+                recording_url = f"recordings/{workflow_run_id}.wav"
+                logger.info(
+                    f"Uploading audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
+                )
+
+                await storage_fs.aupload_file(audio_temp_path, recording_url)
+                await db_client.update_workflow_run(
+                    run_id=workflow_run_id,
+                    recording_url=recording_url,
+                    storage_backend=storage_backend.value,
+                )
+                logger.info(f"Successfully uploaded audio: {recording_url}")
+            else:
+                logger.warning(f"Audio temp file not found: {audio_temp_path}")
+        except Exception as e:
+            logger.error(f"Error uploading audio for workflow {workflow_run_id}: {e}")
+        finally:
+            if audio_temp_path and os.path.exists(audio_temp_path):
+                try:
+                    os.remove(audio_temp_path)
+                    logger.debug(f"Cleaned up temp audio file: {audio_temp_path}")
+                except Exception as e:
+                    logger.warning(f"Failed to clean up temp audio file: {e}")
+
+    # Step 2: Upload transcript if provided
+    if transcript_temp_path:
+        try:
+            if os.path.exists(transcript_temp_path):
+                file_size = os.path.getsize(transcript_temp_path)
+                logger.debug(f"Transcript file size: {file_size} bytes")
+
+                transcript_url = f"transcripts/{workflow_run_id}.txt"
+                logger.info(
+                    f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
+                )
+
+                await storage_fs.aupload_file(transcript_temp_path, transcript_url)
+                await db_client.update_workflow_run(
+                    run_id=workflow_run_id,
+                    transcript_url=transcript_url,
+                    storage_backend=storage_backend.value,
+                )
+                logger.info(f"Successfully uploaded transcript: {transcript_url}")
+            else:
+                logger.warning(
+                    f"Transcript temp file not found: {transcript_temp_path}"
+                )
+        except Exception as e:
+            logger.error(
+                f"Error uploading transcript for workflow {workflow_run_id}: {e}"
+            )
+        finally:
+            if transcript_temp_path and os.path.exists(transcript_temp_path):
+                try:
+                    os.remove(transcript_temp_path)
+                    logger.debug(
+                        f"Cleaned up temp transcript file: {transcript_temp_path}"
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to clean up temp transcript file: {e}")
+
+    # Step 3: Run webhook integrations (after uploads are complete)
+    try:
+        await run_integrations_post_workflow_run(_ctx, workflow_run_id)
+    except Exception as e:
+        logger.error(f"Error running integrations for workflow {workflow_run_id}: {e}")
+
+    logger.info(f"Completed workflow completion processing for run {workflow_run_id}")
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@ -1,5 +1,5 @@
-from dataclasses import dataclass
-from typing import Any, Dict
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
 from unittest.mock import Mock

 import pytest
@ -28,6 +28,87 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 START_CALL_SYSTEM_PROMPT = "start_call_system_prompt"
 END_CALL_SYSTEM_PROMPT = "end_call_system_prompt"

+# Default workflow definition for mocking database WorkflowModel
+DEFAULT_WORKFLOW_DEFINITION = {
+    "nodes": [
+        {
+            "id": "1",
+            "type": "startCall",
+            "position": {"x": 0, "y": 0},
+            "data": {
+                "name": "Start",
+                "prompt": START_CALL_SYSTEM_PROMPT,
+                "is_start": True,
+                "allow_interrupt": False,
+                "add_global_prompt": False,
+            },
+        },
+        {
+            "id": "2",
+            "type": "endCall",
+            "position": {"x": 0, "y": 200},
+            "data": {
+                "name": "End",
+                "prompt": END_CALL_SYSTEM_PROMPT,
+                "is_end": True,
+                "allow_interrupt": False,
+                "add_global_prompt": False,
+            },
+        },
+    ],
+    "edges": [
+        {
+            "id": "1-2",
+            "source": "1",
+            "target": "2",
+            "data": {"label": "End", "condition": "End the call"},
+        }
+    ],
+}
+
+
+@dataclass
+class MockWorkflowModel:
+    """Mock database WorkflowModel for testing.
+
+    This mimics the structure of the database WorkflowModel, not the parsed WorkflowGraph.
+    Use this when mocking db_client.get_workflow() responses.
+    """
+
+    workflow_id: int = 1
+    organization_id: int = 1
+    workflow_configurations: Dict[str, Any] = field(default_factory=dict)
+    workflow_definition_with_fallback: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        if not self.workflow_definition_with_fallback:
+            self.workflow_definition_with_fallback = DEFAULT_WORKFLOW_DEFINITION.copy()
+
+
+@dataclass
+class MockWorkflowRun:
+    """Mock database WorkflowRun for testing.
+
+    Use this when mocking db_client.get_workflow_run() responses.
+    """
+
+    is_completed: bool = False
+    initial_context: Dict[str, Any] = field(default_factory=dict)
+    gathered_context: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class MockUserConfig:
+    """Mock user configuration for testing.
+
+    Use this when mocking db_client.get_user_configurations() responses.
+    """
+
+    stt: Optional[Any] = None
+    tts: Optional[Any] = None
+    llm: Optional[Any] = None
+    embeddings: Optional[Any] = None
+

 class MockTransportProcessor(FrameProcessor):
    """
@ -41,7 +122,7 @@ class MockTransportProcessor(FrameProcessor):

    Args:
        emit_bot_speaking: If True, also emits BotSpeakingFrame on TTSAudioRawFrame
-            which is needed for UserIdleProcessor to start conversation tracking. Default True.
+            which is needed for user idle tracking to start conversation tracking. Default True.
    """

    def __init__(
@ -63,7 +144,7 @@ class MockTransportProcessor(FrameProcessor):
                BotStartedSpeakingFrame(), direction=FrameDirection.UPSTREAM
            )
        elif isinstance(frame, TTSAudioRawFrame):
-            # Emit BotSpeakingFrame - this is what triggers the UserIdleProcessor
+            # Emit BotSpeakingFrame - this is what triggers user idle tracking
            # to start conversation tracking
            if self._emit_bot_speaking:
                await self.push_frame(BotSpeakingFrame())
@ -101,6 +182,24 @@ def mock_engine():
    return engine


+@pytest.fixture
+def mock_workflow_model():
+    """Create a mock WorkflowModel for testing database responses."""
+    return MockWorkflowModel()
+
+
+@pytest.fixture
+def mock_workflow_run():
+    """Create a mock WorkflowRun for testing database responses."""
+    return MockWorkflowRun()
+
+
+@pytest.fixture
+def mock_user_config():
+    """Create a mock user configuration for testing."""
+    return MockUserConfig()
+
+
@pytest.fixture
 def sample_tools():
    """Create sample mock tools for testing."""
--- a/api/tests/test_pipecat_engine_context_update.py
+++ b/api/tests/test_pipecat_engine_context_update.py
@ -42,7 +42,6 @@ from pipecat.processors.aggregators.llm_response_universal import (
 )
 from pipecat.tests import MockLLMService, MockTTSService

-
 # Define prompts for test nodes
 START_NODE_PROMPT = "Start Node System Prompt"
 AGENT_NODE_PROMPT = "Agent Node System Prompt"
@ -143,14 +142,20 @@ class ContextCapturingMockLLM(MockLLMService):
            msg_copy = dict(msg)
            # Copy content to avoid reference issues
            if "content" in msg_copy:
-                msg_copy["content"] = str(msg_copy["content"]) if msg_copy["content"] else None
+                msg_copy["content"] = (
+                    str(msg_copy["content"]) if msg_copy["content"] else None
+                )
            messages_snapshot.append(msg_copy)

-        self.captured_contexts.append({
-            "step": self._current_step,
-            "messages": messages_snapshot,
-            "system_prompt": messages_snapshot[0]["content"] if messages_snapshot else None,
-        })
+        self.captured_contexts.append(
+            {
+                "step": self._current_step,
+                "messages": messages_snapshot,
+                "system_prompt": messages_snapshot[0]["content"]
+                if messages_snapshot
+                else None,
+            }
+        )

        # Call parent implementation to stream the mock chunks
        return await super()._stream_chat_completions_universal_context(context)
@ -306,14 +311,26 @@ class TestContextUpdateBeforeNextCompletion:
        transition completes. The test verifies the context is still correctly updated.
        """
        # Step 0 (Start node): call collect_info to transition to agent
-        step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
-        ])
+        step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "collect_info",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_1",
+                },
+            ]
+        )

        # Step 1 (Agent node): call end_call to transition to end
-        step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
-        ])
+        step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "end_call",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_2",
+                },
+            ]
+        )

        # Step 2 (End node): text response (end node has no outgoing edges)
        step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -327,7 +344,7 @@ class TestContextUpdateBeforeNextCompletion:
        )

        # Should have been called 3 times: start node, agent node, end node
-        assert llm.get_current_step() == 2, (
+        assert llm.get_current_step() == 3, (
            f"Expected 3 LLM generations (start, agent, end), got {llm.get_current_step()}"
        )

@ -376,14 +393,26 @@ class TestContextUpdateBeforeNextCompletion:
        is handled correctly.
        """
        # Step 0 (Start node): call collect_info to transition to agent
-        step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
-        ])
+        step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "collect_info",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_1",
+                },
+            ]
+        )

        # Step 1 (Agent node): call end_call to transition to end
-        step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
-        ])
+        step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "end_call",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_2",
+                },
+            ]
+        )

        # Step 2 (End node): text response
        step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -397,7 +426,7 @@ class TestContextUpdateBeforeNextCompletion:
        )

        # Verify all three nodes were executed
-        assert llm.get_current_step() == 2, (
+        assert llm.get_current_step() == 3, (
            f"Expected 3 steps, got {llm.get_current_step()}"
        )

@ -408,8 +437,7 @@ class TestContextUpdateBeforeNextCompletion:
        assert AGENT_NODE_PROMPT in llm.get_system_prompt_at_step(1)

        # Step 2: End node - should have end prompt
-        # FIXME - EndFrame is getting processed before LLMContextFrame
-        # assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)
+        assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)

        # Verify each subsequent step has the previous tool results
        step_1_ctx = llm.get_context_at_step(1)
@ -423,14 +451,14 @@ class TestContextUpdateBeforeNextCompletion:
        assert step_1_has_tool, "Agent node should see collect_info tool result"

        # Step 2 should have tool results from both transitions
-        # FIXME - EndFrame is getting processed before LLMContextFrame
-        # step_2_tool_messages = [
-        #     msg for msg in step_2_ctx["messages"]
-        #     if msg.get("role") == "tool" or msg.get("tool_call_id")
-        # ]
-        # assert len(step_2_tool_messages) >= 2, (
-        #     f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
-        # )
+        step_2_tool_messages = [
+            msg
+            for msg in step_2_ctx["messages"]
+            if msg.get("role") == "tool" or msg.get("tool_call_id")
+        ]
+        assert len(step_2_tool_messages) >= 2, (
+            f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
+        )

    @pytest.mark.asyncio
    async def test_context_messages_preserve_conversation_history(
@ -444,14 +472,26 @@ class TestContextUpdateBeforeNextCompletion:
        - Tool call messages and results
        """
        # Step 0 (Start node): call collect_info to transition to agent
-        step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
-        ])
+        step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "collect_info",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_1",
+                },
+            ]
+        )

        # Step 1 (Agent node): call end_call to transition to end
-        step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
-            {"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
-        ])
+        step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
+            [
+                {
+                    "name": "end_call",
+                    "arguments": {},
+                    "tool_call_id": "call_transition_2",
+                },
+            ]
+        )

        # Step 2 (End node): text response
        step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
@ -472,18 +512,15 @@ class TestContextUpdateBeforeNextCompletion:
        assert len(ctx_1["messages"]) > len(ctx_0["messages"]), (
            "Context at step 1 should have more messages than step 0"
        )
-        
-        # FIXME 
-        # assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
-        #     "Context at step 2 should have more messages than step 1"
-        # )
+
+        assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
+            "Context at step 2 should have more messages than step 1"
+        )

        # Verify assistant messages are accumulated
-        # FIXME
-        # assistant_messages_at_step_2 = [
-        #     msg for msg in ctx_2["messages"]
-        #     if msg.get("role") == "assistant"
-        # ]
-        # assert len(assistant_messages_at_step_2) >= 2, (
-        #     "Should have at least 2 assistant messages by step 2"
-        # )
+        assistant_messages_at_step_2 = [
+            msg for msg in ctx_2["messages"] if msg.get("role") == "assistant"
+        ]
+        assert len(assistant_messages_at_step_2) >= 2, (
+            "Should have at least 2 assistant messages by step 2"
+        )
--- a/api/tests/test_pipeline_cancellation.py
+++ b/api/tests/test_pipeline_cancellation.py
@ -0,0 +1,100 @@
+import asyncio
+
+import pytest
+from loguru import logger
+
+from pipecat.frames.frames import (
+    EndTaskFrame,
+    Frame,
+    InterruptionTaskFrame,
+    LLMRunFrame,
+)
+from pipecat.pipeline.base_task import PipelineTaskParams
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+
+
+class MockTransport(FrameProcessor):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+        await self.push_frame(frame, direction)
+
+
+class BusyWaitProcessor(FrameProcessor):
+    def __init__(self, wait_time=5.0, **kwargs):
+        super().__init__(**kwargs)
+        self._wait_time = wait_time
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+        if isinstance(frame, LLMRunFrame):
+            # Simulate a delay, which can happen sometimes due to slow LLM Inferencing or
+            # other reasons
+            try:
+                logger.debug(f"{self} sleeping with frame: {frame}")
+                await asyncio.sleep(5)
+                logger.debug(f"{self} woke up with frame: {frame}")
+            except asyncio.CancelledError:
+                logger.debug(f"{self} was cancelled")
+                raise
+        await self.push_frame(frame, direction)
+
+
+@pytest.mark.asyncio
+async def test_interruption_with_blocked_end_frame():
+    busy_wait_processor = BusyWaitProcessor(wait_time=5)
+    transport = MockTransport()
+    pipeline = Pipeline([transport, busy_wait_processor])
+
+    task = PipelineTask(pipeline)
+
+    async def run_pipeline():
+        loop = asyncio.get_running_loop()
+        params = PipelineTaskParams(loop=loop)
+        await task.run(params=params)
+
+    async def queue_frame():
+        await task.queue_frames([LLMRunFrame()])
+
+        # Send EndTaskFrame to simulate EndFrame
+        await asyncio.sleep(0.1)
+        await transport.queue_frame(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
+
+        # Simulate an Interruption, which can happen if the user
+        # has started to speak
+        await asyncio.sleep(0.1)
+        await transport.queue_frame(
+            InterruptionTaskFrame(), direction=FrameDirection.UPSTREAM
+        )
+
+    # Create tasks explicitly for better control
+    pipeline_task = asyncio.create_task(run_pipeline())
+    queue_task = asyncio.create_task(queue_frame())
+
+    # Wait with timeout
+    done, pending = await asyncio.wait(
+        [pipeline_task, queue_task],
+        timeout=1.0,
+        return_when=asyncio.ALL_COMPLETED,
+    )
+
+    # If there are pending tasks, we timed out
+    if pending:
+        # Cancel all pending tasks
+        for t in pending:
+            t.cancel()
+
+        # Give limited time for cleanup, then move on regardless
+        try:
+            await asyncio.wait_for(
+                asyncio.gather(*pending, return_exceptions=True),
+                timeout=1.0,
+            )
+        except asyncio.TimeoutError:
+            pass  # Cleanup took too long, continue anyway
+
+        pytest.fail("Test timed out after 1 second")
--- a/api/tests/test_user_idle_handler.py
+++ b/api/tests/test_user_idle_handler.py
@ -1,10 +1,10 @@
 """
 Simulates a user idle condition and tests the behaviour
-of the user idle processor.
+of the user idle handler.

 This module tests the behavior when the user becomes idle during a conversation,
-ensuring the UserIdleProcessor properly triggers the callback and the engine
-handles it correctly.
+ensuring the user_idle_timeout in LLMUserAggregatorParams properly triggers
+the on_user_turn_idle event and the engine handles it correctly.
 """

 import asyncio
@ -23,8 +23,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
 from pipecat.processors.aggregators.llm_response_universal import (
    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
 )
-from pipecat.processors.user_idle_processor import UserIdleProcessor
 from pipecat.tests import MockLLMService, MockTTSService


@ -32,8 +32,8 @@ async def run_pipeline_with_user_idle(
    workflow: WorkflowGraph,
    user_idle_timeout: float = 0.2,
    mock_steps: list | None = None,
-) -> tuple[MockLLMService, LLMContext, UserIdleProcessor]:
-    """Run a pipeline with UserIdleProcessor and simulate user idle condition.
+) -> tuple[MockLLMService, LLMContext]:
+    """Run a pipeline with user_idle_timeout and simulate user idle condition.

    Args:
        workflow: The workflow graph to use.
@ -42,7 +42,7 @@ async def run_pipeline_with_user_idle(
            defaults to a simple greeting followed by text responses.

    Returns:
-        Tuple of (MockLLMService, LLMContext, UserIdleProcessor) for assertions.
+        Tuple of (MockLLMService, LLMContext) for assertions.
    """
    # Create mock responses - bot will speak first, then respond to idle prompts
    # Step 1: Initial greeting
@ -64,10 +64,11 @@ async def run_pipeline_with_user_idle(
    # Create LLM context
    context = LLMContext()

-    # Create context aggregator with both user and assistant aggregators
+    # Create context aggregator with user_idle_timeout in user_params
    assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
+    user_params = LLMUserAggregatorParams(user_idle_timeout=user_idle_timeout)
    context_aggregator = LLMContextAggregatorPair(
-        context, assistant_params=assistant_params
+        context, assistant_params=assistant_params, user_params=user_params
    )
    user_context_aggregator = context_aggregator.user()
    assistant_context_aggregator = context_aggregator.assistant()
@ -81,18 +82,20 @@ async def run_pipeline_with_user_idle(
        workflow_run_id=1,
    )

-    # Create UserIdleProcessor with engine's callback and a short timeout
-    user_idle_processor = UserIdleProcessor(
-        callback=engine.create_user_idle_callback(),
-        timeout=user_idle_timeout,
-    )
+    # Register user idle event handlers
+    user_idle_handler = engine.create_user_idle_handler()

-    # Build the pipeline:
-    # llm -> mock_transport -> user_idle_processor -> assistant_context_aggregator
-    # The user_context_aggregator would normally be at the start for user input
+    @user_context_aggregator.event_handler("on_user_turn_idle")
+    async def on_user_turn_idle(aggregator):
+        await user_idle_handler.handle_idle(aggregator)
+
+    @user_context_aggregator.event_handler("on_user_turn_started")
+    async def on_user_turn_started(aggregator, strategy):
+        user_idle_handler.reset()
+
+    # Build the pipeline
    pipeline = Pipeline(
        [
-            user_idle_processor,
            user_context_aggregator,
            llm,
            tts,
@ -154,11 +157,11 @@ async def run_pipeline_with_user_idle(
                return_exceptions=True,
            )

-    return llm, context, user_idle_processor
+    return llm, context


 class TestUserIdleHandler:
-    """Test user idle handling through PipecatEngine and UserIdleProcessor."""
+    """Test user idle handling through PipecatEngine and UserIdleHandler."""

    @pytest.mark.asyncio
    async def test_user_idle_triggers_callback(self, simple_workflow: WorkflowGraph):
@ -167,13 +170,13 @@ class TestUserIdleHandler:
        This test verifies that when:
        1. The bot starts speaking (triggers conversation tracking)
        2. No user input is received for the timeout period
-        3. The UserIdleProcessor triggers the idle callback
+        3. The on_user_turn_idle event triggers the idle handler

-        The engine's user idle callback should:
+        The engine's user idle handler should:
        - First retry: Send a message asking if user is still there
        - Second retry: Send goodbye message and end the call
        """
-        llm, context, user_idle_processor = await run_pipeline_with_user_idle(
+        llm, context = await run_pipeline_with_user_idle(
            workflow=simple_workflow,
            user_idle_timeout=0.2,  # Short timeout for faster test
        )
@ -220,7 +223,7 @@ class TestUserIdleHandler:
            MockLLMService.create_text_chunks("Response 3"),
        ]

-        llm, context, user_idle_processor = await run_pipeline_with_user_idle(
+        llm, context = await run_pipeline_with_user_idle(
            workflow=three_node_workflow,
            user_idle_timeout=0.2,
            mock_steps=mock_steps,
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -1,6 +1,6 @@
 services:
  postgres:
-    image: postgres:17
+    image: pgvector/pgvector:pg17
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
@ -83,6 +83,10 @@ services:
      ENVIRONMENT: "local"
      LOG_LEVEL: "INFO"

+      # Replace this environment variable if you are using a custom
+      # domain to host the stack
+      BACKEND_API_ENDPOINT: "http://localhost:8000"
+
      # Database configuration (using containerized postgres)
      DATABASE_URL: "postgresql+asyncpg://postgres:postgres@postgres:5432/postgres"

--- a/docs/deployment/custom-domain.mdx
+++ b/docs/deployment/custom-domain.mdx
@ -162,6 +162,10 @@ server {
 }
 ```

+### Add environment variable
+
+Replace `BACKEND_API_ENDPOINT` environment variable the `docker-compose.yaml` with your custom domain with the scheme.
+
 ### Start Dograh Services

 Start Dograh with the updated configuration:
--- a/evals/stt/README.md
+++ b/evals/stt/README.md
@ -0,0 +1,135 @@
+# STT Evaluation Benchmark
+
+Benchmark for comparing Speech-to-Text providers using **WebSocket streaming** with focus on:
+- **Speaker diarization** - identifying who said what
+- **Keyterm boosting** - improving recognition of specific terms (Deepgram)
+
+## Providers
+
+| Provider | Diarization | Keyterm Boost | Streaming |
+|----------|-------------|---------------|-----------|
+| Deepgram | Yes | Yes | WebSocket (v1/v2) |
+| Speechmatics | Yes | Additional vocab | WebSocket RT |
+
+## Setup
+
+```bash
+# Install dependencies
+pip install websockets
+
+# Set API keys
+export DEEPGRAM_API_KEY="your-key"
+export SPEECHMATICS_API_KEY="your-key"
+```
+
+**Note:** Requires `ffmpeg` installed for audio conversion to PCM16.
+
+## Usage
+
+Run from the project root directory:
+
+```bash
+# Test both providers with diarization
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
+
+# Test only Deepgram
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
+
+# Test with keyterm boosting (Deepgram)
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
+
+# Use different sample rate (default: 8000 Hz)
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --sample-rate 16000
+
+# Show word-level timings
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --show-words
+
+# Save results to JSON
+python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --save
+```
+
+## CLI Options
+
+| Option | Description |
+|--------|-------------|
+| `audio_file` | Path to audio file (relative to evals/stt/ or absolute) |
+| `--providers` | Providers to test: `deepgram`, `speechmatics` (default: both) |
+| `--diarize` | Enable speaker diarization |
+| `--keyterms` | Keywords to boost (Deepgram) / additional vocab (Speechmatics) |
+| `--language` | Language code (default: en) |
+| `--sample-rate` | Audio sample rate for streaming (default: 8000) |
+| `--show-words` | Show individual word timings |
+| `--save` | Save results to JSON in `results/` |
+
+## Directory Structure
+
+```
+evals/stt/
+├── audio/              # Audio test files
+│   └── multi_speaker.m4a
+├── results/            # Saved benchmark results (JSON)
+├── providers/          # STT provider implementations
+│   ├── base.py         # Base classes
+│   ├── deepgram_provider.py    # WebSocket streaming
+│   └── speechmatics_provider.py # WebSocket streaming
+├── audio_streamer.py   # PCM16 audio file streamer
+├── benchmark.py        # Main runner script
+└── README.md
+```
+
+## How It Works
+
+1. **Audio Conversion**: The `AudioStreamer` converts any audio file to raw PCM16 using ffmpeg
+2. **WebSocket Connection**: Providers connect to their respective WebSocket APIs
+3. **Streaming**: Audio is sent in chunks (configurable sample rate, default 8kHz)
+4. **Result Collection**: Transcripts and speaker info are collected from WebSocket responses
+5. **Comparison**: Results are parsed into a common format for comparison
+
+## Output Example
+
+```
+Audio file: /path/to/audio/multi_speaker.m4a
+Providers: ['deepgram', 'speechmatics']
+Diarization: True
+Sample rate: 8000 Hz
+
+============================================================
+Provider: DEEPGRAM
+============================================================
+
+Duration: 45.32s
+Speakers detected: 2 - ['0', '1']
+
+Transcript:
+Hello, welcome to the demo...
+
+--- Speaker Segments ---
+[0.0s] Speaker 0: Hello, welcome to the demo.
+[2.5s] Speaker 1: Thanks for having me.
+...
+
+============================================================
+COMPARISON SUMMARY
+============================================================
+
+Provider        Duration   Speakers   Words
+---------------------------------------------
+deepgram        45.32      2          312
+speechmatics    45.32      2          308
+```
+
+## Adding New Providers
+
+1. Create a new file in `providers/` (e.g., `whisper_provider.py`)
+2. Implement the `STTProvider` abstract class with WebSocket streaming
+3. Use `AudioStreamer` for PCM16 conversion
+4. Add to `providers/__init__.py`
+5. Add to `benchmark.py` provider choices
+
+## API Documentation
+
+- Deepgram Streaming: https://developers.deepgram.com/docs/live-streaming-audio
+- Deepgram Diarization: https://developers.deepgram.com/docs/diarization
+- Deepgram Keyterms: https://developers.deepgram.com/docs/keyterm
+- Speechmatics RT API: https://docs.speechmatics.com/rt-api-ref
+- Speechmatics Diarization: https://docs.speechmatics.com/features/diarization
--- a/evals/stt/init.py
+++ b/evals/stt/init.py
@ -0,0 +1 @@
+# STT Evaluation Benchmark
--- a/evals/stt/audio/multi_speaker.m4a
+++ b/evals/stt/audio/multi_speaker.m4a
--- a/evals/stt/audio/nope.m4a
+++ b/evals/stt/audio/nope.m4a
--- a/evals/stt/audio/not_so_sure.m4a
+++ b/evals/stt/audio/not_so_sure.m4a
--- a/evals/stt/audio/vad.m4a
+++ b/evals/stt/audio/vad.m4a
--- a/evals/stt/audio/yes.m4a
+++ b/evals/stt/audio/yes.m4a
--- a/evals/stt/audio_streamer.py
+++ b/evals/stt/audio_streamer.py
@ -0,0 +1,140 @@
+"""Audio file streamer - converts audio files to PCM16 streams."""
+
+import asyncio
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import AsyncIterator
+
+
+@dataclass
+class AudioConfig:
+    """Audio streaming configuration."""
+
+    sample_rate: int = 8000
+    channels: int = 1
+    sample_width: int = 2  # 16-bit = 2 bytes
+    chunk_duration_ms: int = 80  # Send chunks every 80ms
+
+    @property
+    def chunk_size(self) -> int:
+        """Bytes per chunk based on duration."""
+        samples_per_chunk = int(self.sample_rate * self.chunk_duration_ms / 1000)
+        return samples_per_chunk * self.channels * self.sample_width
+
+
+class AudioStreamer:
+    """Streams audio files as PCM16 chunks.
+
+    Converts any audio format to raw PCM16 using ffmpeg and streams
+    in real-time chunks to simulate live audio.
+    """
+
+    def __init__(self, config: AudioConfig | None = None):
+        self.config = config or AudioConfig()
+
+    def convert_to_pcm16(self, audio_path: Path) -> bytes:
+        """Convert audio file to raw PCM16 bytes using ffmpeg.
+
+        Args:
+            audio_path: Path to input audio file
+
+        Returns:
+            Raw PCM16 audio bytes
+        """
+        cmd = [
+            "ffmpeg",
+            "-i",
+            str(audio_path),
+            "-f",
+            "s16le",  # signed 16-bit little-endian
+            "-acodec",
+            "pcm_s16le",
+            "-ar",
+            str(self.config.sample_rate),
+            "-ac",
+            str(self.config.channels),
+            "-",  # output to stdout
+        ]
+
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            check=True,
+        )
+        return result.stdout
+
+    async def stream_file(
+        self,
+        audio_path: Path,
+        realtime: bool = True,
+        trailing_silence_seconds: float = 0.0,
+    ) -> AsyncIterator[bytes]:
+        """Stream audio file as PCM16 chunks.
+
+        Args:
+            audio_path: Path to audio file
+            realtime: If True, add delays to simulate real-time streaming
+            trailing_silence_seconds: Seconds of silence to append after audio ends.
+                Useful for capturing pending end-of-turn events from STT providers.
+
+        Yields:
+            PCM16 audio chunks
+        """
+        # Convert entire file to PCM16
+        pcm_data = self.convert_to_pcm16(audio_path)
+
+        chunk_size = self.config.chunk_size
+        delay = self.config.chunk_duration_ms / 1000.0 if realtime else 0
+
+        # Stream audio chunks
+        for i in range(0, len(pcm_data), chunk_size):
+            chunk = pcm_data[i : i + chunk_size]
+            if chunk:
+                yield chunk
+                if realtime and delay > 0:
+                    await asyncio.sleep(delay)
+
+        # Stream trailing silence if requested
+        if trailing_silence_seconds > 0:
+            silence_chunk = bytes(chunk_size)  # Zero-filled bytes = silence
+            num_silence_chunks = int(trailing_silence_seconds / (self.config.chunk_duration_ms / 1000.0))
+
+            for _ in range(num_silence_chunks):
+                yield silence_chunk
+                if realtime and delay > 0:
+                    await asyncio.sleep(delay)
+
+    async def stream_file_fast(self, audio_path: Path) -> AsyncIterator[bytes]:
+        """Stream audio file as fast as possible (no real-time delay).
+
+        Args:
+            audio_path: Path to audio file
+
+        Yields:
+            PCM16 audio chunks
+        """
+        async for chunk in self.stream_file(audio_path, realtime=False):
+            yield chunk
+
+    def get_duration(self, audio_path: Path) -> float:
+        """Get audio file duration in seconds.
+
+        Args:
+            audio_path: Path to audio file
+
+        Returns:
+            Duration in seconds
+        """
+        cmd = [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            str(audio_path),
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        return float(result.stdout.strip())
--- a/evals/stt/benchmark.py
+++ b/evals/stt/benchmark.py
@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""STT Benchmark Runner.
+
+Compare speech-to-text transcription across providers with focus on:
+- Speaker diarization accuracy
+- Keyword/keyterm recognition
+- Transcription quality
+
+Usage:
+    python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
+    python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
+    python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from evals.stt.providers import (
+    DeepgramProvider,
+    DeepgramFluxProvider,
+    SpeechmaticsProvider,
+    LocalSmartTurnProvider,
+    STTProvider,
+    TranscriptionResult,
+)
+
+
+def get_provider(name: str) -> STTProvider:
+    """Get provider instance by name."""
+    providers = {
+        "deepgram": DeepgramProvider,
+        "deepgram-flux": DeepgramFluxProvider,
+        "speechmatics": SpeechmaticsProvider,
+        "local-smart-turn": LocalSmartTurnProvider,
+    }
+    if name not in providers:
+        raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
+    return providers[name]()
+
+
+async def run_transcription(
+    provider: STTProvider,
+    audio_path: Path,
+    diarize: bool = False,
+    keyterms: list[str] | None = None,
+    **kwargs: Any,
+) -> TranscriptionResult:
+    """Run transcription with a provider."""
+    print(f"\n{'='*60}")
+    print(f"Provider: {provider.name.upper()}")
+    print(f"{'='*60}")
+
+    try:
+        result = await provider.transcribe(
+            audio_path,
+            diarize=diarize,
+            keyterms=keyterms,
+            **kwargs,
+        )
+        return result
+    except Exception as e:
+        print(f"Error with {provider.name}: {e}")
+        raise
+
+
+def print_result(result: TranscriptionResult, show_words: bool = False) -> None:
+    """Print transcription result."""
+    print(f"\nDuration: {result.duration:.2f}s")
+    print(f"Speakers detected: {len(result.speakers)} - {result.speakers}")
+    print(f"\nTranscript:\n{result.transcript}")
+
+    if result.speakers:
+        print(f"\n--- Speaker Segments ---")
+        for segment in result.get_speaker_segments():
+            speaker = segment["speaker"] or "?"
+            text = segment["text"]
+            start = segment["start"]
+            print(f"[{start:.1f}s] Speaker {speaker}: {text}")
+
+    if show_words:
+        print(f"\n--- Words ---")
+        for word in result.words[:50]:  # First 50 words
+            speaker_info = f" (S{word.speaker})" if word.speaker else ""
+            print(f"  {word.start:.2f}s: {word.word}{speaker_info} [{word.confidence:.2f}]")
+        if len(result.words) > 50:
+            print(f"  ... and {len(result.words) - 50} more words")
+
+
+def save_results(
+    results: list[TranscriptionResult],
+    output_dir: Path,
+    audio_name: str,
+) -> Path:
+    """Save results to JSON file."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_file = output_dir / f"{audio_name}_{timestamp}.json"
+
+    output_data = {
+        "timestamp": timestamp,
+        "audio_file": audio_name,
+        "results": [r.to_dict() for r in results],
+    }
+
+    with open(output_file, "w") as f:
+        json.dump(output_data, f, indent=2)
+
+    print(f"\nResults saved to: {output_file}")
+    return output_file
+
+
+def compare_results(results: list[TranscriptionResult]) -> None:
+    """Compare results across providers."""
+    if len(results) < 2:
+        return
+
+    print(f"\n{'='*60}")
+    print("COMPARISON SUMMARY")
+    print(f"{'='*60}")
+
+    print(f"\n{'Provider':<15} {'Duration':<10} {'Speakers':<10} {'Words':<10}")
+    print("-" * 45)
+    for r in results:
+        print(f"{r.provider:<15} {r.duration:<10.2f} {len(r.speakers):<10} {len(r.words):<10}")
+
+    # Compare speaker counts
+    speaker_counts = {r.provider: len(r.speakers) for r in results}
+    if len(set(speaker_counts.values())) > 1:
+        print(f"\nNote: Providers detected different speaker counts: {speaker_counts}")
+
+
+async def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="STT Benchmark - Compare transcription providers",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
+  python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
+  python -m evals.stt.benchmark audio/multi_speaker.m4a --keyterms "Dograh" "API"
+        """,
+    )
+    parser.add_argument(
+        "audio_file",
+        type=str,
+        help="Path to audio file (relative to evals/stt/ or absolute)",
+    )
+    parser.add_argument(
+        "--providers",
+        nargs="+",
+        default=["deepgram", "speechmatics"],
+        choices=["deepgram", "deepgram-flux", "speechmatics", "local-smart-turn"],
+        help="Providers to test (default: all)",
+    )
+    parser.add_argument(
+        "--diarize",
+        action="store_true",
+        help="Enable speaker diarization",
+    )
+    parser.add_argument(
+        "--keyterms",
+        nargs="+",
+        help="Keywords to boost (Deepgram only)",
+    )
+    parser.add_argument(
+        "--language",
+        default="en",
+        help="Language code (default: en)",
+    )
+    parser.add_argument(
+        "--sample-rate",
+        type=int,
+        default=8000,
+        help="Audio sample rate for streaming (default: 8000)",
+    )
+    parser.add_argument(
+        "--show-words",
+        action="store_true",
+        help="Show individual word timings",
+    )
+    parser.add_argument(
+        "--save",
+        action="store_true",
+        help="Save results to JSON file",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="results",
+        help="Output directory for results (default: results)",
+    )
+
+    args = parser.parse_args()
+
+    # Resolve audio path
+    script_dir = Path(__file__).parent
+    audio_path = Path(args.audio_file)
+    if not audio_path.is_absolute():
+        audio_path = script_dir / audio_path
+
+    if not audio_path.exists():
+        print(f"Error: Audio file not found: {audio_path}")
+        return 1
+
+    print(f"Audio file: {audio_path}")
+    print(f"Providers: {args.providers}")
+    print(f"Diarization: {args.diarize}")
+    print(f"Sample rate: {args.sample_rate} Hz")
+    if args.keyterms:
+        print(f"Keyterms: {args.keyterms}")
+
+    results: list[TranscriptionResult] = []
+
+    for provider_name in args.providers:
+        try:
+            provider = get_provider(provider_name)
+            result = await run_transcription(
+                provider,
+                audio_path,
+                diarize=args.diarize,
+                keyterms=args.keyterms,
+                language=args.language,
+                sample_rate=args.sample_rate,
+            )
+            print_result(result, show_words=args.show_words)
+            results.append(result)
+        except Exception as e:
+            print(f"\nFailed to run {provider_name}: {e}")
+            continue
+
+    if len(results) > 1:
+        compare_results(results)
+
+    if args.save and results:
+        output_dir = script_dir / args.output_dir
+        save_results(results, output_dir, audio_path.stem)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))
--- a/evals/stt/event_capture.py
+++ b/evals/stt/event_capture.py
@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+"""STT Event Capture Runner.
+
+Streams audio to STT providers and captures raw WebSocket events with timestamps
+for visualization in the web UI.
+
+Usage:
+    python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
+    python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable
+
+from evals.stt.audio_streamer import AudioStreamer
+from evals.stt.providers import (
+    DeepgramFluxProvider,
+    DeepgramProvider,
+    SpeechmaticsProvider,
+    STTProvider,
+)
+
+
+@dataclass
+class CapturedEvent:
+    """A captured WebSocket event with timestamp."""
+
+    timestamp: float  # Time since stream start (seconds)
+    event_type: str  # e.g., "Results", "TurnInfo", "AddTranscript"
+    data: dict[str, Any]  # Raw event payload
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "timestamp": self.timestamp,
+            "event_type": self.event_type,
+            "data": self.data,
+        }
+
+
+@dataclass
+class EventCaptureResult:
+    """Result from event capture session."""
+
+    audio_file: str
+    audio_path: str  # Relative path to audio from results dir
+    provider: str
+    duration: float
+    created_at: str
+    events: list[CapturedEvent] = field(default_factory=list)
+    transcript: str = ""  # Final transcript for reference
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "audio_file": self.audio_file,
+            "audio_path": self.audio_path,
+            "provider": self.provider,
+            "duration": self.duration,
+            "created_at": self.created_at,
+            "events": [e.to_dict() for e in self.events],
+            "transcript": self.transcript,
+        }
+
+
+EventCallback = Callable[[str, dict[str, Any]], None]
+
+
+def get_provider(name: str) -> STTProvider:
+    """Get provider instance by name."""
+    providers = {
+        "deepgram": DeepgramProvider,
+        "deepgram-flux": DeepgramFluxProvider,
+        "speechmatics": SpeechmaticsProvider,
+    }
+    if name not in providers:
+        raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
+    return providers[name]()
+
+
+async def capture_events(
+    provider: STTProvider,
+    audio_path: Path,
+    sample_rate: int = 8000,
+    **kwargs: Any,
+) -> EventCaptureResult:
+    """Capture WebSocket events from a provider.
+
+    Args:
+        provider: The STT provider to use
+        audio_path: Path to the audio file
+        sample_rate: Audio sample rate
+        **kwargs: Additional provider parameters
+
+    Returns:
+        EventCaptureResult with all captured events
+    """
+    # Get audio duration
+    streamer = AudioStreamer()
+    duration = streamer.get_duration(audio_path)
+
+    # Event list and start time
+    events: list[CapturedEvent] = []
+    start_time: float | None = None
+
+    def on_event(event_type: str, data: dict[str, Any]) -> None:
+        """Callback for capturing events."""
+        nonlocal start_time
+        if start_time is None:
+            start_time = asyncio.get_event_loop().time()
+
+        timestamp = asyncio.get_event_loop().time() - start_time
+        events.append(CapturedEvent(timestamp=timestamp, event_type=event_type, data=data))
+
+    # Run transcription with event callback
+    result = await provider.transcribe(
+        audio_path,
+        sample_rate=sample_rate,
+        on_event=on_event,
+        **kwargs,
+    )
+
+    return EventCaptureResult(
+        audio_file=audio_path.name,
+        audio_path=f"../audio/{audio_path.name}",
+        provider=provider.name,
+        duration=duration,
+        created_at=datetime.now().isoformat(),
+        events=events,
+        transcript=result.transcript,
+    )
+
+
+def save_result(result: EventCaptureResult, output_dir: Path) -> Path:
+    """Save capture result to JSON file.
+
+    Args:
+        result: The capture result to save
+        output_dir: Directory to save results
+
+    Returns:
+        Path to the saved file
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Format: {audio_name}-{provider}.json
+    audio_name = Path(result.audio_file).stem
+    output_file = output_dir / f"{audio_name}-{result.provider}.json"
+
+    with open(output_file, "w") as f:
+        json.dump(result.to_dict(), f, indent=2)
+
+    return output_file
+
+
+async def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="STT Event Capture - Capture WebSocket events for visualization",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
+  python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics --diarize
+        """,
+    )
+    parser.add_argument(
+        "audio_file",
+        type=str,
+        help="Path to audio file (relative to evals/stt/ or absolute)",
+    )
+    parser.add_argument(
+        "--provider",
+        required=True,
+        choices=["deepgram", "deepgram-flux", "speechmatics"],
+        help="STT provider to use",
+    )
+    parser.add_argument(
+        "--sample-rate",
+        type=int,
+        default=8000,
+        help="Audio sample rate for streaming (default: 8000)",
+    )
+    parser.add_argument(
+        "--diarize",
+        action="store_true",
+        help="Enable speaker diarization",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="results",
+        help="Output directory for results (default: results)",
+    )
+
+    args = parser.parse_args()
+
+    # Resolve audio path
+    script_dir = Path(__file__).parent
+    audio_path = Path(args.audio_file)
+    if not audio_path.is_absolute():
+        audio_path = script_dir / audio_path
+
+    if not audio_path.exists():
+        print(f"Error: Audio file not found: {audio_path}")
+        return 1
+
+    print(f"Audio file: {audio_path}")
+    print(f"Provider: {args.provider}")
+    print(f"Sample rate: {args.sample_rate} Hz")
+    print(f"Diarization: {args.diarize}")
+
+    try:
+        provider = get_provider(args.provider)
+        print(f"\nCapturing events from {provider.name}...")
+
+        result = await capture_events(
+            provider,
+            audio_path,
+            sample_rate=args.sample_rate,
+            diarize=args.diarize,
+        )
+
+        output_dir = script_dir / args.output_dir
+        output_file = save_result(result, output_dir)
+
+        print(f"\nCapture complete!")
+        print(f"  Duration: {result.duration:.2f}s")
+        print(f"  Events: {len(result.events)}")
+        print(f"  Saved to: {output_file}")
+
+        # Show first few events
+        print(f"\nFirst 5 events:")
+        for event in result.events[:5]:
+            print(f"  [{event.timestamp:.2f}s] {event.event_type}")
+
+        return 0
+
+    except Exception as e:
+        print(f"\nError: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))
--- a/evals/stt/providers/init.py
+++ b/evals/stt/providers/init.py
@ -0,0 +1,16 @@
+from .base import EventCallback, STTProvider, TranscriptionResult, Word
+from .deepgram_provider import DeepgramProvider
+from .deepgram_flux_provider import DeepgramFluxProvider
+from .speechmatics_provider import SpeechmaticsProvider
+from .local_smart_turn_provider import LocalSmartTurnProvider
+
+__all__ = [
+    "EventCallback",
+    "STTProvider",
+    "TranscriptionResult",
+    "Word",
+    "DeepgramProvider",
+    "DeepgramFluxProvider",
+    "SpeechmaticsProvider",
+    "LocalSmartTurnProvider",
+]
--- a/evals/stt/providers/base.py
+++ b/evals/stt/providers/base.py
@ -0,0 +1,128 @@
+"""Base classes for STT providers."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable
+
+# Event callback type: (event_type, data) -> None
+EventCallback = Callable[[str, dict[str, Any]], None]
+
+
+@dataclass
+class Word:
+    """Represents a transcribed word with metadata."""
+
+    word: str
+    start: float
+    end: float
+    confidence: float
+    speaker: str | None = None
+    speaker_confidence: float | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "word": self.word,
+            "start": self.start,
+            "end": self.end,
+            "confidence": self.confidence,
+            "speaker": self.speaker,
+            "speaker_confidence": self.speaker_confidence,
+        }
+
+
+@dataclass
+class TranscriptionResult:
+    """Result from STT transcription."""
+
+    provider: str
+    transcript: str
+    words: list[Word]
+    speakers: list[str]
+    duration: float
+    raw_response: dict[str, Any] = field(default_factory=dict)
+    params: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "provider": self.provider,
+            "transcript": self.transcript,
+            "words": [w.to_dict() for w in self.words],
+            "speakers": self.speakers,
+            "duration": self.duration,
+            "params": self.params,
+        }
+
+    def get_speaker_segments(self) -> list[dict[str, Any]]:
+        """Get transcript segmented by speaker."""
+        if not self.words:
+            return []
+
+        segments = []
+        current_speaker = None
+        current_text = []
+        segment_start = 0.0
+
+        for word in self.words:
+            if word.speaker != current_speaker:
+                if current_text:
+                    segments.append(
+                        {
+                            "speaker": current_speaker,
+                            "text": " ".join(current_text),
+                            "start": segment_start,
+                            "end": self.words[len(segments) - 1].end
+                            if segments
+                            else word.start,
+                        }
+                    )
+                current_speaker = word.speaker
+                current_text = [word.word]
+                segment_start = word.start
+            else:
+                current_text.append(word.word)
+
+        if current_text:
+            segments.append(
+                {
+                    "speaker": current_speaker,
+                    "text": " ".join(current_text),
+                    "start": segment_start,
+                    "end": self.words[-1].end if self.words else 0.0,
+                }
+            )
+
+        return segments
+
+
+class STTProvider(ABC):
+    """Abstract base class for STT providers."""
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Provider name."""
+        pass
+
+    @abstractmethod
+    async def transcribe(
+        self,
+        audio_path: Path,
+        diarize: bool = False,
+        keyterms: list[str] | None = None,
+        on_event: EventCallback | None = None,
+        **kwargs: Any,
+    ) -> TranscriptionResult:
+        """Transcribe audio file.
+
+        Args:
+            audio_path: Path to the audio file
+            diarize: Enable speaker diarization
+            keyterms: List of keywords to boost (if supported)
+            on_event: Optional callback for raw WebSocket events (event_type, data)
+            **kwargs: Provider-specific parameters
+
+        Returns:
+            TranscriptionResult with transcript and metadata
+        """
+        pass
--- a/evals/stt/providers/deepgram_flux_provider.py
+++ b/evals/stt/providers/deepgram_flux_provider.py
@ -0,0 +1,235 @@
+"""Deepgram Flux STT provider with WebSocket streaming.
+
+Flux is Deepgram's conversational AI model with built-in turn detection.
+It has a different API than Nova models - no language/punctuate/diarize params.
+"""
+
+import asyncio
+import json
+import os
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlencode
+
+from loguru import logger
+
+from ..audio_streamer import AudioConfig, AudioStreamer
+from .base import EventCallback, STTProvider, TranscriptionResult, Word
+
+try:
+    from websockets.asyncio.client import connect as websocket_connect
+except ImportError:
+    raise ImportError("websockets required: pip install websockets")
+
+
+class DeepgramFluxProvider(STTProvider):
+    """Deepgram Flux Speech-to-Text provider with WebSocket streaming.
+
+    Flux is optimized for conversational AI with built-in turn detection.
+
+    Key differences from Nova:
+    - Uses v2 API endpoint
+    - Only supports English (flux-general-en)
+    - No punctuate, diarize, or language params
+    - Has turn detection events (StartOfTurn, EndOfTurn, EagerEndOfTurn)
+    - Supports keyterm boosting
+
+    API Docs: https://developers.deepgram.com/docs/
+    """
+
+    WS_URL = "wss://api.deepgram.com/v2/listen"
+
+    def __init__(self, api_key: str | None = None):
+        self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
+            )
+
+    @property
+    def name(self) -> str:
+        return "deepgram-flux"
+
+    async def transcribe(
+        self,
+        audio_path: Path,
+        diarize: bool = False,  # Ignored - Flux doesn't support diarization
+        keyterms: list[str] | None = None,
+        on_event: EventCallback | None = None,
+        model: str = "flux-general-en",
+        sample_rate: int = 16000,
+        eot_threshold: float | None = 0.70,
+        eot_timeout_ms: int | None = 3000,
+        eager_eot_threshold: float | None = None,
+        trailing_silence_seconds: float = 3.0,
+        **kwargs: Any,
+    ) -> TranscriptionResult:
+        """Transcribe audio using Deepgram Flux WebSocket streaming.
+
+        Args:
+            audio_path: Path to audio file
+            diarize: IGNORED - Flux does not support diarization
+            keyterms: List of keywords to boost recognition
+            on_event: Optional callback for raw WebSocket events
+            model: Flux model (default: flux-general-en)
+            sample_rate: Audio sample rate (default: 16000 for Flux)
+            eot_threshold: End-of-turn confidence threshold (0-1, default 0.7)
+            eot_timeout_ms: Timeout in ms to force end of turn (default 5000)
+            eager_eot_threshold: Threshold for eager end-of-turn events
+            trailing_silence_seconds: Seconds of silence after audio to capture pending events
+            **kwargs: Additional Flux parameters
+
+        Returns:
+            TranscriptionResult with transcript (no speaker info - Flux doesn't support diarization)
+        """
+        if diarize:
+            logger.warning("Flux does not support diarization - ignoring diarize=True")
+
+        # Build query params - Flux only supports specific params
+        params: dict[str, Any] = {
+            "model": model,
+            "encoding": "linear16",
+            "sample_rate": sample_rate,
+        }
+
+        # Flux-specific turn detection params
+        if eot_threshold is not None:
+            params["eot_threshold"] = eot_threshold
+        if eot_timeout_ms is not None:
+            params["eot_timeout_ms"] = eot_timeout_ms
+        if eager_eot_threshold is not None:
+            params["eager_eot_threshold"] = eager_eot_threshold
+
+        # Build URL with params
+        url_parts = [f"{k}={v}" for k, v in params.items()]
+
+        # Add keyterms (repeated params)
+        if keyterms:
+            for term in keyterms:
+                url_parts.append(urlencode({"keyterm": term}))
+
+        ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
+        logger.debug(f"Flux WebSocket URL: {ws_url}")
+
+        # Setup audio streamer
+        audio_config = AudioConfig(sample_rate=sample_rate)
+        streamer = AudioStreamer(audio_config)
+
+        # Collect results
+        all_transcripts: list[dict[str, Any]] = []
+        final_transcript = ""
+        duration = 0.0
+        connected = asyncio.Event()
+
+        async with websocket_connect(
+            ws_url,
+            additional_headers={"Authorization": f"Token {self.api_key}"},
+        ) as ws:
+
+            async def send_audio():
+                """Send audio chunks to Deepgram Flux."""
+                await connected.wait()
+
+                chunk_no = 0
+                async for chunk in streamer.stream_file(
+                    audio_path, trailing_silence_seconds=trailing_silence_seconds
+                ):
+                    logger.trace(f"[deepgram-flux] Sent audio chunk {chunk_no}")
+                    await ws.send(chunk)
+                    chunk_no += 1
+
+            async def receive_messages():
+                """Receive and collect Flux messages."""
+                nonlocal all_transcripts, final_transcript, duration
+
+                async for message in ws:
+                    if isinstance(message, str):
+                        data = json.loads(message)
+                        msg_type = data.get("type")
+                        logger.debug(f"[deepgram-flux] Received {msg_type}: {data}")
+
+                        # Emit event via callback if provided
+                        if on_event and msg_type:
+                            on_event(msg_type, data)
+
+                        if msg_type == "Connected":
+                            logger.info("[deepgram-flux] Connected")
+                            connected.set()
+
+                        elif msg_type == "TurnInfo":
+                            event = data.get("event")
+                            transcript = data.get("transcript", "")
+                            words = data.get("words", [])
+
+                            if event == "EndOfTurn":
+                                if transcript:
+                                    final_transcript += transcript + " "
+                                if words:
+                                    all_transcripts.append({
+                                        "transcript": transcript,
+                                        "words": words,
+                                    })
+                                    # Get duration from last word
+                                    if words:
+                                        last_word = words[-1]
+                                        duration = max(duration, last_word.get("end", 0))
+
+                            elif event == "TurnResumed":
+                                logger.debug("TurnResumed")
+
+                        elif msg_type == "Error":
+                            raise Exception(f"Deepgram Flux error: {data}")
+
+            # Run send and receive concurrently
+            send_task = asyncio.create_task(send_audio())
+            receive_task = asyncio.create_task(receive_messages())
+
+            await send_task
+            
+            logger.debug("[deepgram-flux] Send task done")
+            try:
+                await asyncio.wait_for(receive_task, timeout=10.0)
+            except asyncio.TimeoutError:
+                pass
+
+        return self._parse_results(
+            all_transcripts, final_transcript.strip(), duration, params, keyterms
+        )
+
+    def _parse_results(
+        self,
+        transcripts: list[dict[str, Any]],
+        final_transcript: str,
+        duration: float,
+        params: dict[str, Any],
+        keyterms: list[str] | None,
+    ) -> TranscriptionResult:
+        """Parse collected Flux results into TranscriptionResult."""
+        words = []
+
+        for turn in transcripts:
+            for w in turn.get("words", []):
+                words.append(
+                    Word(
+                        word=w.get("word", ""),
+                        start=w.get("start", 0.0),
+                        end=w.get("end", 0.0),
+                        confidence=w.get("confidence", 0.0),
+                        speaker=None,  # Flux doesn't support diarization
+                        speaker_confidence=None,
+                    )
+                )
+
+        stored_params = dict(params)
+        if keyterms:
+            stored_params["keyterms"] = keyterms
+
+        return TranscriptionResult(
+            provider=self.name,
+            transcript=final_transcript,
+            words=words,
+            speakers=[],  # Flux doesn't support diarization
+            duration=duration,
+            raw_response={"transcripts": transcripts},
+            params=stored_params,
+        )
--- a/evals/stt/providers/deepgram_provider.py
+++ b/evals/stt/providers/deepgram_provider.py
@ -0,0 +1,236 @@
+"""Deepgram STT provider with WebSocket streaming."""
+
+import asyncio
+import json
+import os
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlencode
+
+from ..audio_streamer import AudioConfig, AudioStreamer
+from .base import EventCallback, STTProvider, TranscriptionResult, Word
+from loguru import logger
+
+try:
+    from websockets.asyncio.client import connect as websocket_connect
+except ImportError:
+    raise ImportError("websockets required: pip install websockets")
+
+
+class DeepgramProvider(STTProvider):
+    """Deepgram Nova Speech-to-Text provider with WebSocket streaming.
+
+    API Docs: https://developers.deepgram.com/docs/
+
+    Supports:
+    - Speaker diarization via `diarize=true`
+    - Keyterm boosting via `keyterm` parameter
+    - Real-time streaming via WebSocket
+    - Multiple languages
+    - Punctuation
+
+    For Flux models, use DeepgramFluxProvider instead.
+    """
+
+    WS_URL = "wss://api.deepgram.com/v1/listen"
+
+    def __init__(self, api_key: str | None = None):
+        self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
+            )
+
+    @property
+    def name(self) -> str:
+        return "deepgram"
+
+    async def transcribe(
+        self,
+        audio_path: Path,
+        diarize: bool = False,
+        keyterms: list[str] | None = None,
+        on_event: EventCallback | None = None,
+        model: str = "nova-3-general",
+        language: str = "en",
+        sample_rate: int = 8000,
+        punctuate: bool = True,
+        trailing_silence_seconds: float = 3.0,
+        **kwargs: Any,
+    ) -> TranscriptionResult:
+        """Transcribe audio using Deepgram Nova WebSocket streaming.
+
+        Args:
+            audio_path: Path to audio file
+            diarize: Enable speaker diarization
+            keyterms: List of keywords to boost recognition
+            on_event: Optional callback for raw WebSocket events
+            model: Deepgram Nova model (nova-3, nova-2, etc.)
+            language: Language code
+            sample_rate: Audio sample rate for streaming
+            punctuate: Add punctuation
+            trailing_silence_seconds: Seconds of silence after audio to capture pending events
+            **kwargs: Additional Deepgram parameters
+
+        Returns:
+            TranscriptionResult with transcript and speaker info
+        """
+        # Build query params
+        params: dict[str, Any] = {
+            "model": model,
+            "language": language,
+            "punctuate": str(punctuate).lower(),
+            "encoding": "linear16",
+            "sample_rate": sample_rate,
+            "channels": 1,
+            "interim_results": "true",
+            "smart_format": "true",
+            "profanity_filter": "true",
+            "vad_events": "true",
+            "utterance_end_ms": "1000"
+        }
+
+        if diarize:
+            params["diarize"] = "true"
+
+        # Build URL with params
+        url_parts = [f"{k}={v}" for k, v in params.items()]
+
+        # Add keyterms (repeated params)
+        if keyterms:
+            for term in keyterms:
+                url_parts.append(urlencode({"keyterm": term}))
+
+        # Add extra kwargs
+        for k, v in kwargs.items():
+            url_parts.append(f"{k}={v}")
+
+        ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
+        logger.debug(f"Deepgram WebSocket URL: {ws_url}")
+
+        # Setup audio streamer
+        audio_config = AudioConfig(sample_rate=sample_rate)
+        streamer = AudioStreamer(audio_config)
+
+        # Collect results
+        all_words: list[dict[str, Any]] = []
+        final_transcript = ""
+        duration = 0.0
+
+        try:
+            async with websocket_connect(
+                ws_url,
+                additional_headers={"Authorization": f"Token {self.api_key}"},
+            ) as ws:
+                # Create tasks for sending and receiving
+                send_complete = asyncio.Event()
+
+                async def send_audio():
+                    """Send audio chunks to Deepgram."""
+                    chunk_no = 0
+                    async for chunk in streamer.stream_file(
+                        audio_path, trailing_silence_seconds=trailing_silence_seconds
+                    ):
+                        logger.trace(f"[deepgram] Sent audio chunk {chunk_no}")
+                        await ws.send(chunk)
+                        chunk_no += 1
+                    # Send close message
+                    logger.debug(f"[deepgram] Sending CloseStream after {chunk_no} chunks")
+                    await ws.send(json.dumps({"type": "CloseStream"}))
+                    send_complete.set()
+
+                async def receive_transcripts():
+                    """Receive and collect transcription results."""
+                    nonlocal all_words, final_transcript, duration
+
+                    async for message in ws:
+                        if isinstance(message, str):
+                            data = json.loads(message)
+                            msg_type = data.get("type")
+                            logger.debug(f"[deepgram] Received {msg_type}: {data}")
+
+                            # Emit event via callback if provided
+                            if on_event and msg_type:
+                                on_event(msg_type, data)
+
+                            if msg_type == "Results":
+                                # Nova-style response
+                                channel = data.get("channel", {})
+                                alternatives = channel.get("alternatives", [])
+                                if alternatives:
+                                    alt = alternatives[0]
+                                    words = alt.get("words", [])
+                                    all_words.extend(words)
+
+                                    # Check if final
+                                    if data.get("is_final"):
+                                        final_transcript += alt.get("transcript", "") + " "
+                                        duration = max(
+                                            duration, data.get("duration", 0) + data.get("start", 0)
+                                        )
+
+                            elif msg_type == "Metadata":
+                                # Get duration from metadata
+                                duration = data.get("duration", duration)
+
+                            elif msg_type == "Error":
+                                raise Exception(f"Deepgram error: {data}")
+
+                # Run send and receive concurrently
+                send_task = asyncio.create_task(send_audio())
+                receive_task = asyncio.create_task(receive_transcripts())
+
+                # Wait for send to complete, then wait a bit for final results
+                await send_task
+                try:
+                    await asyncio.wait_for(receive_task, timeout=5.0)
+                except asyncio.TimeoutError:
+                    pass  # Normal - websocket closes after final results
+        except Exception as e:
+            logger.exception(e)
+
+        return self._parse_results(
+            all_words, final_transcript.strip(), duration, params, keyterms
+        )
+
+    def _parse_results(
+        self,
+        raw_words: list[dict[str, Any]],
+        transcript: str,
+        duration: float,
+        params: dict[str, Any],
+        keyterms: list[str] | None,
+    ) -> TranscriptionResult:
+        """Parse collected results into TranscriptionResult."""
+        words = []
+        speakers_set: set[str] = set()
+
+        for w in raw_words:
+            speaker = str(w.get("speaker", "")) if "speaker" in w else None
+            if speaker:
+                speakers_set.add(speaker)
+
+            words.append(
+                Word(
+                    word=w.get("word", ""),
+                    start=w.get("start", 0.0),
+                    end=w.get("end", 0.0),
+                    confidence=w.get("confidence", 0.0),
+                    speaker=speaker,
+                    speaker_confidence=w.get("speaker_confidence"),
+                )
+            )
+
+        stored_params = dict(params)
+        if keyterms:
+            stored_params["keyterms"] = keyterms
+
+        return TranscriptionResult(
+            provider=self.name,
+            transcript=transcript,
+            words=words,
+            speakers=sorted(speakers_set),
+            duration=duration,
+            raw_response={"words": raw_words},
+            params=stored_params,
+        )
--- a/evals/stt/providers/local_smart_turn_provider.py
+++ b/evals/stt/providers/local_smart_turn_provider.py
@ -0,0 +1,287 @@
+"""Local Smart Turn provider for benchmarking end-of-turn detection.
+
+Uses the pipecat smart-turn-v3 ONNX model for local ML-based turn detection.
+This is NOT an STT provider - it only detects when a speaker has finished talking.
+"""
+
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from loguru import logger
+
+from ..audio_streamer import AudioConfig, AudioStreamer
+from .base import EventCallback, STTProvider, TranscriptionResult, Word
+
+try:
+    import onnxruntime as ort
+    from transformers import WhisperFeatureExtractor
+except ImportError:
+    raise ImportError(
+        "onnxruntime and transformers required: pip install onnxruntime transformers"
+    )
+
+
+@dataclass
+class TurnEvent:
+    """Represents a detected turn event."""
+    timestamp: float  # Time in audio when turn was detected
+    probability: float  # Model confidence
+    prediction: int  # 1=complete, 0=incomplete
+    inference_time_ms: float
+
+
+class LocalSmartTurnProvider(STTProvider):
+    """Local Smart Turn provider for end-of-turn detection benchmarking.
+
+    Uses the smart-turn-v3 ONNX model to detect when speakers finish talking.
+    This is useful for comparing turn detection accuracy against cloud services
+    like Deepgram Flux's built-in turn detection.
+
+    NOTE: This provider does NOT produce transcripts - only turn detection events.
+    """
+
+    # Smart turn model requires 16kHz audio
+    REQUIRED_SAMPLE_RATE = 16000
+    # Model analyzes 8 seconds of audio
+    WINDOW_SECONDS = 8
+
+    def __init__(
+        self,
+        model_path: str | None = None,
+        cpu_count: int = 1,
+    ):
+        """Initialize the local smart turn provider.
+
+        Args:
+            model_path: Path to ONNX model file. If None, uses bundled model.
+            cpu_count: Number of CPUs for inference (default: 1)
+        """
+        self.model_path = model_path
+        self.cpu_count = cpu_count
+        self._session = None
+        self._feature_extractor = None
+
+    def _load_model(self):
+        """Lazy load the ONNX model."""
+        if self._session is not None:
+            return
+
+        model_path = self.model_path
+
+        if not model_path:
+            # Try to load bundled model from pipecat
+            model_name = "smart-turn-v3.1-cpu.onnx"
+            package_path = "pipecat.audio.turn.smart_turn.data"
+
+            try:
+                import importlib_resources as impresources
+                model_path = str(impresources.files(package_path).joinpath(model_name))
+            except Exception:
+                from importlib import resources as impresources
+                try:
+                    with impresources.path(package_path, model_name) as f:
+                        model_path = str(f)
+                except Exception:
+                    model_path = str(impresources.files(package_path).joinpath(model_name))
+
+        logger.info(f"[local-smart-turn] Loading model from {model_path}")
+
+        # Configure ONNX runtime
+        so = ort.SessionOptions()
+        so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+        so.inter_op_num_threads = 1
+        so.intra_op_num_threads = self.cpu_count
+        so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+        self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
+        self._session = ort.InferenceSession(model_path, sess_options=so)
+
+        logger.info("[local-smart-turn] Model loaded")
+
+    @property
+    def name(self) -> str:
+        return "local-smart-turn"
+
+    def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
+        """Predict end-of-turn using the ONNX model.
+
+        Args:
+            audio_array: Audio samples as float32 numpy array (16kHz)
+
+        Returns:
+            Dict with prediction (0/1) and probability
+        """
+        # Truncate to last 8 seconds or pad to 8 seconds
+        max_samples = self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE
+        if len(audio_array) > max_samples:
+            audio_array = audio_array[-max_samples:]
+        elif len(audio_array) < max_samples:
+            padding = max_samples - len(audio_array)
+            audio_array = np.pad(audio_array, (padding, 0), mode="constant", constant_values=0)
+
+        # Process using Whisper's feature extractor
+        inputs = self._feature_extractor(
+            audio_array,
+            sampling_rate=self.REQUIRED_SAMPLE_RATE,
+            return_tensors="np",
+            padding="max_length",
+            max_length=self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE,
+            truncation=True,
+            do_normalize=True,
+        )
+
+        # Extract features for ONNX
+        input_features = inputs.input_features.squeeze(0).astype(np.float32)
+        input_features = np.expand_dims(input_features, axis=0)
+
+        # Run inference
+        start_time = time.perf_counter()
+        outputs = self._session.run(None, {"input_features": input_features})
+        inference_time = (time.perf_counter() - start_time) * 1000
+
+        # Extract probability (model returns sigmoid probabilities)
+        probability = outputs[0][0].item()
+        prediction = 1 if probability > 0.5 else 0
+
+        return {
+            "prediction": prediction,
+            "probability": probability,
+            "inference_time_ms": inference_time,
+        }
+
+    async def transcribe(
+        self,
+        audio_path: Path,
+        diarize: bool = False,  # Ignored - not applicable
+        keyterms: list[str] | None = None,  # Ignored - not applicable
+        on_event: EventCallback | None = None,  # Ignored - not applicable
+        sample_rate: int = 16000,  # Must be 16kHz for smart turn
+        analysis_interval_ms: int = 500,  # How often to check for turn completion
+        **kwargs: Any,
+    ) -> TranscriptionResult:
+        """Analyze audio for turn detection events.
+
+        NOTE: This does NOT produce transcripts. It detects when speakers
+        finish talking using ML-based turn detection.
+
+        Args:
+            audio_path: Path to audio file
+            diarize: Ignored (not applicable for turn detection)
+            keyterms: Ignored (not applicable for turn detection)
+            on_event: Ignored (not applicable for turn detection)
+            sample_rate: Must be 16000 Hz for smart turn model
+            analysis_interval_ms: How often to run turn detection (ms)
+            **kwargs: Additional parameters (ignored)
+
+        Returns:
+            TranscriptionResult with turn detection events in raw_response
+        """
+        if sample_rate != self.REQUIRED_SAMPLE_RATE:
+            logger.warning(
+                f"[local-smart-turn] Sample rate must be {self.REQUIRED_SAMPLE_RATE}Hz, "
+                f"overriding {sample_rate}Hz"
+            )
+            sample_rate = self.REQUIRED_SAMPLE_RATE
+
+        # Load model if not already loaded
+        self._load_model()
+
+        # Setup audio streamer at 16kHz
+        audio_config = AudioConfig(sample_rate=sample_rate)
+        streamer = AudioStreamer(audio_config)
+
+        # Get audio duration
+        duration = streamer.get_duration(audio_path)
+        logger.info(f"[local-smart-turn] Processing {audio_path} ({duration:.2f}s)")
+
+        # Collect all audio first (smart turn needs to analyze segments)
+        pcm_data = streamer.convert_to_pcm16(audio_path)
+
+        # Convert to float32 for model
+        audio_int16 = np.frombuffer(pcm_data, dtype=np.int16)
+        audio_float32 = audio_int16.astype(np.float32) / 32768.0
+
+        # Analyze at intervals
+        turn_events: list[TurnEvent] = []
+        samples_per_interval = int(sample_rate * analysis_interval_ms / 1000)
+        window_samples = self.WINDOW_SECONDS * sample_rate
+
+        chunk_no = 0
+        for end_sample in range(samples_per_interval, len(audio_float32), samples_per_interval):
+            # Get window of audio ending at current position
+            start_sample = max(0, end_sample - window_samples)
+            audio_window = audio_float32[start_sample:end_sample]
+
+            current_time = end_sample / sample_rate
+            logger.debug(f"[local-smart-turn] Analyzing chunk {chunk_no} at {current_time:.2f}s")
+
+            result = self._predict_endpoint(audio_window)
+
+            turn_events.append(TurnEvent(
+                timestamp=current_time,
+                probability=result["probability"],
+                prediction=result["prediction"],
+                inference_time_ms=result["inference_time_ms"],
+            ))
+
+            if result["prediction"] == 1:
+                logger.info(
+                    f"[local-smart-turn] Turn complete at {current_time:.2f}s "
+                    f"(prob={result['probability']:.3f})"
+                    f"(inf time ms={result["inference_time_ms"]})"
+                )
+
+            chunk_no += 1
+
+        # Create result
+        # Convert turn events to word-like format for compatibility
+        words = []
+        for event in turn_events:
+            if event.prediction == 1:
+                words.append(Word(
+                    word=f"[END_OF_TURN prob={event.probability:.2f}]",
+                    start=event.timestamp - 0.1,
+                    end=event.timestamp,
+                    confidence=event.probability,
+                    speaker=None,
+                    speaker_confidence=None,
+                ))
+
+        # Count completed turns
+        completed_turns = sum(1 for e in turn_events if e.prediction == 1)
+
+        params = {
+            "sample_rate": sample_rate,
+            "analysis_interval_ms": analysis_interval_ms,
+            "window_seconds": self.WINDOW_SECONDS,
+        }
+
+        return TranscriptionResult(
+            provider=self.name,
+            transcript=f"[Turn detection only - {completed_turns} turns detected]",
+            words=words,
+            speakers=[],  # Not applicable
+            duration=duration,
+            raw_response={
+                "turn_events": [
+                    {
+                        "timestamp": e.timestamp,
+                        "probability": e.probability,
+                        "prediction": e.prediction,
+                        "inference_time_ms": e.inference_time_ms,
+                    }
+                    for e in turn_events
+                ],
+                "completed_turns": completed_turns,
+                "total_analyses": len(turn_events),
+                "avg_inference_time_ms": (
+                    sum(e.inference_time_ms for e in turn_events) / len(turn_events)
+                    if turn_events else 0
+                ),
+            },
+            params=params,
+        )
--- a/evals/stt/providers/speechmatics_provider.py
+++ b/evals/stt/providers/speechmatics_provider.py
@ -0,0 +1,258 @@
+"""Speechmatics STT provider with WebSocket streaming."""
+
+import asyncio
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from loguru import logger
+
+from ..audio_streamer import AudioConfig, AudioStreamer
+from .base import EventCallback, STTProvider, TranscriptionResult, Word
+
+try:
+    from websockets.asyncio.client import connect as websocket_connect
+except ImportError:
+    raise ImportError("websockets required: pip install websockets")
+
+
+class SpeechmaticsProvider(STTProvider):
+    """Speechmatics Speech-to-Text provider with WebSocket streaming.
+
+    API Docs: https://docs.speechmatics.com/
+
+    Supports:
+    - Speaker diarization via `diarization: "speaker"` config
+    - Speaker sensitivity tuning
+    - Real-time streaming via WebSocket
+    """
+
+    def __init__(self, api_key: str | None = None, region: str = "eu2"):
+        self.api_key = api_key or os.getenv("SPEECHMATICS_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "Speechmatics API key required. Set SPEECHMATICS_API_KEY env var or pass api_key."
+            )
+        # Set region-specific endpoint
+        self.ws_url = f"wss://{region}.rt.speechmatics.com/v2"
+
+    @property
+    def name(self) -> str:
+        return "speechmatics"
+
+    async def transcribe(
+        self,
+        audio_path: Path,
+        diarize: bool = False,
+        keyterms: list[str] | None = None,
+        on_event: EventCallback | None = None,
+        language: str = "en",
+        operating_point: str = "enhanced",
+        sample_rate: int = 8000,
+        speaker_sensitivity: float | None = None,
+        max_speakers: int | None = None,
+        trailing_silence_seconds: float = 3.0,
+        **kwargs: Any,
+    ) -> TranscriptionResult:
+        """Transcribe audio using Speechmatics WebSocket streaming.
+
+        Args:
+            audio_path: Path to audio file
+            diarize: Enable speaker diarization
+            keyterms: Additional vocabulary (limited support)
+            on_event: Optional callback for raw WebSocket events
+            language: Language code
+            operating_point: "standard" or "enhanced"
+            sample_rate: Audio sample rate for streaming
+            speaker_sensitivity: 0.0-1.0, higher = more speakers detected
+            max_speakers: Maximum number of speakers to detect
+            trailing_silence_seconds: Seconds of silence after audio to capture pending events
+            **kwargs: Additional config parameters
+
+        Returns:
+            TranscriptionResult with transcript and speaker info
+        """
+        # Build transcription config for StartRecognition message
+        transcription_config: dict[str, Any] = {
+            "language": language,
+            "operating_point": operating_point,
+            "enable_partials": False,
+        }
+
+        if diarize:
+            transcription_config["diarization"] = "speaker"
+            if speaker_sensitivity is not None:
+                transcription_config["speaker_diarization_config"] = {
+                    "speaker_sensitivity": speaker_sensitivity
+                }
+            if max_speakers is not None:
+                if "speaker_diarization_config" not in transcription_config:
+                    transcription_config["speaker_diarization_config"] = {}
+                transcription_config["speaker_diarization_config"]["max_speakers"] = max_speakers
+
+        # Add additional vocabulary if provided
+        if keyterms:
+            transcription_config["additional_vocab"] = [{"content": term} for term in keyterms]
+
+        # Audio format config
+        audio_format = {
+            "type": "raw",
+            "encoding": "pcm_s16le",
+            "sample_rate": sample_rate,
+        }
+
+        # Store params for result
+        params = {
+            "diarize": diarize,
+            "language": language,
+            "operating_point": operating_point,
+            "sample_rate": sample_rate,
+            "speaker_sensitivity": speaker_sensitivity,
+            "max_speakers": max_speakers,
+        }
+
+        # Setup audio streamer
+        audio_config = AudioConfig(sample_rate=sample_rate)
+        streamer = AudioStreamer(audio_config)
+
+        # Collect results
+        all_results: list[dict[str, Any]] = []
+        recognition_started = asyncio.Event()
+        transcription_complete = asyncio.Event()
+
+        async with websocket_connect(
+            self.ws_url,
+            additional_headers={"Authorization": f"Bearer {self.api_key}"},
+        ) as ws:
+            # Send StartRecognition message
+            start_msg = {
+                "message": "StartRecognition",
+                "transcription_config": transcription_config,
+                "audio_format": audio_format,
+            }
+            await ws.send(json.dumps(start_msg))
+
+            async def send_audio():
+                """Send audio chunks after recognition starts."""
+                await recognition_started.wait()
+
+                chunk_no = 0
+                async for chunk in streamer.stream_file(
+                    audio_path, trailing_silence_seconds=trailing_silence_seconds
+                ):
+                    logger.debug(f"[speechmatics] Sent audio chunk {chunk_no}")
+                    await ws.send(chunk)
+                    chunk_no += 1
+
+                # Signal end of audio with last sequence number
+                logger.debug(f"[speechmatics] Sending EndOfStream after {chunk_no} chunks")
+                await ws.send(json.dumps({"message": "EndOfStream", "last_seq_no": chunk_no}))
+
+            async def receive_messages():
+                """Receive and process messages."""
+                nonlocal all_results
+
+                async for message in ws:
+                    if isinstance(message, str):
+                        data = json.loads(message)
+                        msg_type = data.get("message")
+                        logger.debug(f"[speechmatics] Received {msg_type}: {data}")
+
+                        # Emit event via callback if provided
+                        if on_event and msg_type:
+                            on_event(msg_type, data)
+
+                        if msg_type == "RecognitionStarted":
+                            logger.info("[speechmatics] Connected")
+                            recognition_started.set()
+
+                        elif msg_type == "AddTranscript":
+                            # Final transcript segment
+                            results = data.get("results", [])
+                            all_results.extend(results)
+
+                        elif msg_type == "EndOfTranscript":
+                            transcription_complete.set()
+                            return
+
+                        elif msg_type == "Error":
+                            raise Exception(f"Speechmatics error: {data}")
+
+                        elif msg_type == "Warning":
+                            logger.warning(f"[speechmatics] Warning: {data.get('reason')}")
+
+            # Run send and receive concurrently
+            send_task = asyncio.create_task(send_audio())
+            receive_task = asyncio.create_task(receive_messages())
+
+            # Wait for completion
+            await send_task
+            try:
+                await asyncio.wait_for(transcription_complete.wait(), timeout=30.0)
+            except asyncio.TimeoutError:
+                pass
+
+            receive_task.cancel()
+            try:
+                await receive_task
+            except asyncio.CancelledError:
+                pass
+
+        return self._parse_results(all_results, params)
+
+    def _parse_results(
+        self,
+        results: list[dict[str, Any]],
+        params: dict[str, Any],
+    ) -> TranscriptionResult:
+        """Parse Speechmatics results."""
+        words = []
+        speakers_set: set[str] = set()
+        transcript_parts = []
+        duration = 0.0
+
+        for item in results:
+            item_type = item.get("type")
+            alternatives = item.get("alternatives", [])
+
+            if not alternatives:
+                continue
+
+            alt = alternatives[0]
+            content = alt.get("content", "")
+            speaker = alt.get("speaker")
+
+            if speaker:
+                speakers_set.add(speaker)
+
+            end_time = item.get("end_time", 0.0)
+            duration = max(duration, end_time)
+
+            if item_type == "word":
+                words.append(
+                    Word(
+                        word=content,
+                        start=item.get("start_time", 0.0),
+                        end=end_time,
+                        confidence=alt.get("confidence", 0.0),
+                        speaker=speaker,
+                        speaker_confidence=None,
+                    )
+                )
+                transcript_parts.append(content)
+            elif item_type == "punctuation":
+                if transcript_parts:
+                    transcript_parts[-1] += content
+
+        transcript = " ".join(transcript_parts)
+
+        return TranscriptionResult(
+            provider=self.name,
+            transcript=transcript,
+            words=words,
+            speakers=sorted(speakers_set),
+            duration=duration,
+            raw_response={"results": results},
+            params=params,
+        )
--- a/evals/stt/results/multi_speaker-deepgram-flux.json
+++ b/evals/stt/results/multi_speaker-deepgram-flux.json
@ -0,0 +1,867 @@
+{
+  "audio_file": "multi_speaker.m4a",
+  "audio_path": "../audio/multi_speaker.m4a",
+  "provider": "deepgram-flux",
+  "duration": 7.987664,
+  "created_at": "2026-01-20T12:21:59.183902",
+  "events": [
+    {
+      "timestamp": 3.1916191801428795e-05,
+      "event_type": "Connected",
+      "data": {
+        "type": "Connected",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "sequence_id": 0
+      }
+    },
+    {
+      "timestamp": 0.6468284581787884,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2195,
+        "sequence_id": 1
+      }
+    },
+    {
+      "timestamp": 0.8891876661218703,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.167,
+        "sequence_id": 2
+      }
+    },
+    {
+      "timestamp": 1.0987569580320269,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1045,
+        "sequence_id": 3
+      }
+    },
+    {
+      "timestamp": 1.356455208035186,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.3054,
+        "sequence_id": 4
+      }
+    },
+    {
+      "timestamp": 1.6076077912002802,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.2,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2996,
+        "sequence_id": 5
+      }
+    },
+    {
+      "timestamp": 1.831926790997386,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.44,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1659,
+        "sequence_id": 6
+      }
+    },
+    {
+      "timestamp": 2.0988957500085235,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.6800001,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0922,
+        "sequence_id": 7
+      }
+    },
+    {
+      "timestamp": 2.320036916062236,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.9200001,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1154,
+        "sequence_id": 8
+      }
+    },
+    {
+      "timestamp": 2.5783222501631826,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.16,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0789,
+        "sequence_id": 9
+      }
+    },
+    {
+      "timestamp": 2.805098250042647,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.4,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.028,
+        "sequence_id": 10
+      }
+    },
+    {
+      "timestamp": 3.0677467910572886,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.6399999,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0544,
+        "sequence_id": 11
+      }
+    },
+    {
+      "timestamp": 3.3053550410550088,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.88,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0221,
+        "sequence_id": 12
+      }
+    },
+    {
+      "timestamp": 3.5730851250700653,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 3.12,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0896,
+        "sequence_id": 13
+      }
+    },
+    {
+      "timestamp": 3.7986690001562238,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 3.3600001,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0837,
+        "sequence_id": 14
+      }
+    },
+    {
+      "timestamp": 4.056284500053152,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 3.6,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0217,
+        "sequence_id": 15
+      }
+    },
+    {
+      "timestamp": 4.2824959580320865,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 3.84,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0277,
+        "sequence_id": 16
+      }
+    },
+    {
+      "timestamp": 4.541013500187546,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 4.08,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0636,
+        "sequence_id": 17
+      }
+    },
+    {
+      "timestamp": 4.7826515410561115,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 4.32,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.092,
+        "sequence_id": 18
+      }
+    },
+    {
+      "timestamp": 5.044063208159059,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 4.56,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1632,
+        "sequence_id": 19
+      }
+    },
+    {
+      "timestamp": 5.277323708171025,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 4.8,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1748,
+        "sequence_id": 20
+      }
+    },
+    {
+      "timestamp": 5.519584750058129,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 5.04,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1267,
+        "sequence_id": 21
+      }
+    },
+    {
+      "timestamp": 5.761642290977761,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 5.28,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.085,
+        "sequence_id": 22
+      }
+    },
+    {
+      "timestamp": 5.985961250029504,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 5.52,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0726,
+        "sequence_id": 23
+      }
+    },
+    {
+      "timestamp": 6.235282083041966,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 5.76,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1489,
+        "sequence_id": 24
+      }
+    },
+    {
+      "timestamp": 6.479744625044987,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 6.0,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1815,
+        "sequence_id": 25
+      }
+    },
+    {
+      "timestamp": 6.722758750198409,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 6.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1548,
+        "sequence_id": 26
+      }
+    },
+    {
+      "timestamp": 7.02101350016892,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 6.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1779,
+        "sequence_id": 27
+      }
+    },
+    {
+      "timestamp": 7.2554090830963105,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 6.7200003,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1924,
+        "sequence_id": 28
+      }
+    },
+    {
+      "timestamp": 7.495738583151251,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 6.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0734,
+        "sequence_id": 29
+      }
+    },
+    {
+      "timestamp": 7.695259500062093,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 7.2,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0621,
+        "sequence_id": 30
+      }
+    },
+    {
+      "timestamp": 7.9374284581281245,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 7.44,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0523,
+        "sequence_id": 31
+      }
+    },
+    {
+      "timestamp": 8.201127333100885,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 7.68,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0868,
+        "sequence_id": 32
+      }
+    },
+    {
+      "timestamp": 8.452570000197738,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 7.92,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1788,
+        "sequence_id": 33
+      }
+    },
+    {
+      "timestamp": 8.6957666662056,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 8.16,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.3462,
+        "sequence_id": 34
+      }
+    },
+    {
+      "timestamp": 8.937032666057348,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 8.4,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.3477,
+        "sequence_id": 35
+      }
+    },
+    {
+      "timestamp": 9.179693832993507,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 8.64,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2825,
+        "sequence_id": 36
+      }
+    },
+    {
+      "timestamp": 9.439219749998301,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 8.88,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1785,
+        "sequence_id": 37
+      }
+    },
+    {
+      "timestamp": 9.65257745818235,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 9.12,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.119,
+        "sequence_id": 38
+      }
+    },
+    {
+      "timestamp": 9.894739540992305,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 9.36,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0948,
+        "sequence_id": 39
+      }
+    },
+    {
+      "timestamp": 10.137037916108966,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 9.6,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0836,
+        "sequence_id": 40
+      }
+    },
+    {
+      "timestamp": 10.37885733298026,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 9.84,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0648,
+        "sequence_id": 41
+      }
+    },
+    {
+      "timestamp": 10.640081625198945,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 10.08,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0426,
+        "sequence_id": 42
+      }
+    },
+    {
+      "timestamp": 10.882513708202168,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 10.32,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0297,
+        "sequence_id": 43
+      }
+    },
+    {
+      "timestamp": 11.11375533300452,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 10.56,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0247,
+        "sequence_id": 44
+      }
+    },
+    {
+      "timestamp": 11.356210750062019,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 10.8,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0134,
+        "sequence_id": 45
+      }
+    },
+    {
+      "timestamp": 11.60117325000465,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 11.04,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0102,
+        "sequence_id": 46
+      }
+    },
+    {
+      "timestamp": 11.859979416010901,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 11.28,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0089,
+        "sequence_id": 47
+      }
+    },
+    {
+      "timestamp": 12.093679000157863,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 11.52,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0074,
+        "sequence_id": 48
+      }
+    },
+    {
+      "timestamp": 12.334945333190262,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 11.76,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.007,
+        "sequence_id": 49
+      }
+    },
+    {
+      "timestamp": 12.588809041073546,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 12.0,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0067,
+        "sequence_id": 50
+      }
+    },
+    {
+      "timestamp": 12.83585675014183,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 12.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0042,
+        "sequence_id": 51
+      }
+    },
+    {
+      "timestamp": 13.075434750178829,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 12.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0047,
+        "sequence_id": 52
+      }
+    },
+    {
+      "timestamp": 13.31491966615431,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "63038896-d7d9-4186-995f-16056c3306d5",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 12.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0036,
+        "sequence_id": 53
+      }
+    }
+  ],
+  "transcript": ""
+}
--- a/evals/stt/results/multi_speaker-deepgram.json
+++ b/evals/stt/results/multi_speaker-deepgram.json
@ -0,0 +1,637 @@
+{
+  "audio_file": "multi_speaker.m4a",
+  "audio_path": "../audio/multi_speaker.m4a",
+  "provider": "deepgram",
+  "duration": 7.987664,
+  "created_at": "2026-01-20T12:15:06.097292",
+  "events": [
+    {
+      "timestamp": 2.50060111284256e-07,
+      "event_type": "SpeechStarted",
+      "data": {
+        "type": "SpeechStarted",
+        "channel": [
+          0,
+          1
+        ],
+        "timestamp": 0.13
+      }
+    },
+    {
+      "timestamp": 0.9085824999492615,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 1.0399375,
+        "start": 0.0,
+        "is_final": false,
+        "speech_final": false,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "Biggest pleasure",
+              "confidence": 0.7919922,
+              "words": [
+                {
+                  "word": "biggest",
+                  "start": 0.0,
+                  "end": 0.39999998,
+                  "confidence": 0.7919922,
+                  "punctuated_word": "Biggest"
+                },
+                {
+                  "word": "pleasure",
+                  "start": 0.39999998,
+                  "end": 0.79999995,
+                  "confidence": 0.77734375,
+                  "punctuated_word": "pleasure"
+                }
+              ]
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 1.9669485830236226,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 2.0799375,
+        "start": 0.0,
+        "is_final": false,
+        "speech_final": false,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 3.0349432919174433,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 3.1199374,
+        "start": 0.0,
+        "is_final": false,
+        "speech_final": false,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "Please give a text that I am just trying to",
+              "confidence": 0.4921875,
+              "words": [
+                {
+                  "word": "please",
+                  "start": 0.48,
+                  "end": 0.79999995,
+                  "confidence": 0.19970703,
+                  "punctuated_word": "Please"
+                },
+                {
+                  "word": "give",
+                  "start": 0.79999995,
+                  "end": 1.04,
+                  "confidence": 0.2849121,
+                  "punctuated_word": "give"
+                },
+                {
+                  "word": "a",
+                  "start": 0.96,
+                  "end": 1.1999999,
+                  "confidence": 0.4921875,
+                  "punctuated_word": "a"
+                },
+                {
+                  "word": "text",
+                  "start": 1.1999999,
+                  "end": 1.5999999,
+                  "confidence": 0.4482422,
+                  "punctuated_word": "text"
+                },
+                {
+                  "word": "that",
+                  "start": 1.5999999,
+                  "end": 2.1599998,
+                  "confidence": 0.5317383,
+                  "punctuated_word": "that"
+                },
+                {
+                  "word": "i",
+                  "start": 2.1599998,
+                  "end": 2.32,
+                  "confidence": 0.984375,
+                  "punctuated_word": "I"
+                },
+                {
+                  "word": "am",
+                  "start": 2.32,
+                  "end": 2.48,
+                  "confidence": 0.5024414,
+                  "punctuated_word": "am"
+                },
+                {
+                  "word": "just",
+                  "start": 2.48,
+                  "end": 2.6399999,
+                  "confidence": 0.27416992,
+                  "punctuated_word": "just"
+                },
+                {
+                  "word": "trying",
+                  "start": 2.6399999,
+                  "end": 2.96,
+                  "confidence": 0.19909668,
+                  "punctuated_word": "trying"
+                },
+                {
+                  "word": "to",
+                  "start": 2.96,
+                  "end": 3.04,
+                  "confidence": 0.7060547,
+                  "punctuated_word": "to"
+                }
+              ]
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 4.100316457916051,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 4.1599374,
+        "start": 0.0,
+        "is_final": false,
+        "speech_final": false,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "Is the test that I am just trying do so. Multiple",
+              "confidence": 0.7207031,
+              "words": [
+                {
+                  "word": "is",
+                  "start": 0.24,
+                  "end": 0.79999995,
+                  "confidence": 0.83251953,
+                  "punctuated_word": "Is"
+                },
+                {
+                  "word": "the",
+                  "start": 0.88,
+                  "end": 1.12,
+                  "confidence": 0.14794922,
+                  "punctuated_word": "the"
+                },
+                {
+                  "word": "test",
+                  "start": 1.12,
+                  "end": 1.52,
+                  "confidence": 0.7207031,
+                  "punctuated_word": "test"
+                },
+                {
+                  "word": "that",
+                  "start": 1.52,
+                  "end": 2.1599998,
+                  "confidence": 0.40307617,
+                  "punctuated_word": "that"
+                },
+                {
+                  "word": "i",
+                  "start": 2.1599998,
+                  "end": 2.3999999,
+                  "confidence": 0.99316406,
+                  "punctuated_word": "I"
+                },
+                {
+                  "word": "am",
+                  "start": 2.3999999,
+                  "end": 2.48,
+                  "confidence": 0.52783203,
+                  "punctuated_word": "am"
+                },
+                {
+                  "word": "just",
+                  "start": 2.48,
+                  "end": 2.72,
+                  "confidence": 0.27270508,
+                  "punctuated_word": "just"
+                },
+                {
+                  "word": "trying",
+                  "start": 2.72,
+                  "end": 3.12,
+                  "confidence": 0.81591797,
+                  "punctuated_word": "trying"
+                },
+                {
+                  "word": "do",
+                  "start": 3.12,
+                  "end": 3.28,
+                  "confidence": 0.9116211,
+                  "punctuated_word": "do"
+                },
+                {
+                  "word": "so",
+                  "start": 3.28,
+                  "end": 3.4399998,
+                  "confidence": 0.37774658,
+                  "punctuated_word": "so."
+                },
+                {
+                  "word": "multiple",
+                  "start": 3.6,
+                  "end": 3.84,
+                  "confidence": 0.74072266,
+                  "punctuated_word": "Multiple"
+                }
+              ]
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 4.506603500107303,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 4.53,
+        "start": 0.0,
+        "is_final": true,
+        "speech_final": true,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "Is the test that I am testing multiple speaker",
+              "confidence": 0.65966797,
+              "words": [
+                {
+                  "word": "is",
+                  "start": 0.24,
+                  "end": 0.39999998,
+                  "confidence": 0.83984375,
+                  "punctuated_word": "Is"
+                },
+                {
+                  "word": "the",
+                  "start": 0.39999998,
+                  "end": 0.79999995,
+                  "confidence": 0.15722656,
+                  "punctuated_word": "the"
+                },
+                {
+                  "word": "test",
+                  "start": 1.12,
+                  "end": 1.52,
+                  "confidence": 0.8588867,
+                  "punctuated_word": "test"
+                },
+                {
+                  "word": "that",
+                  "start": 1.52,
+                  "end": 2.1599998,
+                  "confidence": 0.35107422,
+                  "punctuated_word": "that"
+                },
+                {
+                  "word": "i",
+                  "start": 2.1599998,
+                  "end": 2.32,
+                  "confidence": 0.99121094,
+                  "punctuated_word": "I"
+                },
+                {
+                  "word": "am",
+                  "start": 2.32,
+                  "end": 2.48,
+                  "confidence": 0.6010742,
+                  "punctuated_word": "am"
+                },
+                {
+                  "word": "testing",
+                  "start": 2.48,
+                  "end": 3.12,
+                  "confidence": 0.9526367,
+                  "punctuated_word": "testing"
+                },
+                {
+                  "word": "multiple",
+                  "start": 3.4399998,
+                  "end": 3.84,
+                  "confidence": 0.65966797,
+                  "punctuated_word": "multiple"
+                },
+                {
+                  "word": "speaker",
+                  "start": 3.84,
+                  "end": 4.3199997,
+                  "confidence": 0.20446777,
+                  "punctuated_word": "speaker"
+                }
+              ]
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 4.648572708014399,
+      "event_type": "SpeechStarted",
+      "data": {
+        "type": "SpeechStarted",
+        "channel": [
+          0,
+          1
+        ],
+        "timestamp": 4.63
+      }
+    },
+    {
+      "timestamp": 5.556989792035893,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 1.0699372,
+        "start": 4.53,
+        "is_final": false,
+        "speech_final": false,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 6.615257542114705,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 2.08,
+        "start": 4.53,
+        "is_final": true,
+        "speech_final": true,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 6.769657667027786,
+      "event_type": "SpeechStarted",
+      "data": {
+        "type": "SpeechStarted",
+        "channel": [
+          0,
+          1
+        ],
+        "timestamp": 6.72
+      }
+    },
+    {
+      "timestamp": 7.672739624977112,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 1.0099998,
+        "start": 6.61,
+        "is_final": true,
+        "speech_final": true,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 8.081677624955773,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 0.3676877,
+        "start": 7.62,
+        "is_final": true,
+        "speech_final": true,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 8.083154707914218,
+      "event_type": "Results",
+      "data": {
+        "type": "Results",
+        "channel_index": [
+          0,
+          1
+        ],
+        "duration": 0.0,
+        "start": 7.9876876,
+        "is_final": true,
+        "speech_final": true,
+        "channel": {
+          "alternatives": [
+            {
+              "transcript": "",
+              "confidence": 0.0,
+              "words": []
+            }
+          ]
+        },
+        "metadata": {
+          "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+          "model_info": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          },
+          "model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        },
+        "from_finalize": false
+      }
+    },
+    {
+      "timestamp": 8.083194707985967,
+      "event_type": "Metadata",
+      "data": {
+        "type": "Metadata",
+        "transaction_key": "deprecated",
+        "request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
+        "sha256": "a6f954deb3fb3bf7a3c420061d5dd968251ba401d6304e6cd2fc9f396c12da77",
+        "created": "2026-01-20T06:44:57.522Z",
+        "duration": 7.9876876,
+        "channels": 1,
+        "models": [
+          "40bd3654-e622-47c4-a111-63a61b23bfe8"
+        ],
+        "model_info": {
+          "40bd3654-e622-47c4-a111-63a61b23bfe8": {
+            "name": "general-nova-3",
+            "version": "2025-04-17.21547",
+            "arch": "nova-3"
+          }
+        }
+      }
+    }
+  ],
+  "transcript": "Is the test that I am testing multiple speaker"
+}
--- a/evals/stt/results/nope-deepgram-flux.json
+++ b/evals/stt/results/nope-deepgram-flux.json
@ -0,0 +1,445 @@
+{
+  "audio_file": "nope.m4a",
+  "audio_path": "../audio/nope.m4a",
+  "provider": "deepgram-flux",
+  "duration": 3.390113,
+  "created_at": "2026-01-20T13:34:04.075559",
+  "events": [
+    {
+      "timestamp": 3.3294782042503357e-07,
+      "event_type": "Connected",
+      "data": {
+        "type": "Connected",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "sequence_id": 0
+      }
+    },
+    {
+      "timestamp": 0.6400237919297069,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1726,
+        "sequence_id": 1
+      }
+    },
+    {
+      "timestamp": 0.850623874925077,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0643,
+        "sequence_id": 2
+      }
+    },
+    {
+      "timestamp": 1.0877662498969585,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0343,
+        "sequence_id": 3
+      }
+    },
+    {
+      "timestamp": 1.3602930000051856,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.023,
+        "sequence_id": 4
+      }
+    },
+    {
+      "timestamp": 1.5734205420594662,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "StartOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.2,
+        "transcript": "No.",
+        "words": [
+          {
+            "word": "No.",
+            "confidence": 0.9956
+          }
+        ],
+        "end_of_turn_confidence": 0.1445,
+        "sequence_id": 5
+      }
+    },
+    {
+      "timestamp": 1.7732612078543752,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "EndOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.36,
+        "transcript": "No.",
+        "words": [
+          {
+            "word": "No.",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.7266,
+        "sequence_id": 6
+      }
+    },
+    {
+      "timestamp": 2.0032672078814358,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 1.6,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2114,
+        "sequence_id": 7
+      }
+    },
+    {
+      "timestamp": 2.272528207860887,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 1.8399999,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2886,
+        "sequence_id": 8
+      }
+    },
+    {
+      "timestamp": 2.4770477078855038,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 2.08,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1366,
+        "sequence_id": 9
+      }
+    },
+    {
+      "timestamp": 2.7586996669415385,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 2.32,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0687,
+        "sequence_id": 10
+      }
+    },
+    {
+      "timestamp": 2.9688463748898357,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 2.56,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0571,
+        "sequence_id": 11
+      }
+    },
+    {
+      "timestamp": 3.2333728750236332,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 2.8,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0284,
+        "sequence_id": 12
+      }
+    },
+    {
+      "timestamp": 3.4381651668809354,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 3.04,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0352,
+        "sequence_id": 13
+      }
+    },
+    {
+      "timestamp": 3.7163160829804838,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 3.28,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0211,
+        "sequence_id": 14
+      }
+    },
+    {
+      "timestamp": 3.936306041898206,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 3.52,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0123,
+        "sequence_id": 15
+      }
+    },
+    {
+      "timestamp": 4.212840874912217,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 3.76,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0399,
+        "sequence_id": 16
+      }
+    },
+    {
+      "timestamp": 4.417071416974068,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 4.0,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0503,
+        "sequence_id": 17
+      }
+    },
+    {
+      "timestamp": 4.685962416930124,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 4.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0443,
+        "sequence_id": 18
+      }
+    },
+    {
+      "timestamp": 4.898042541928589,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 4.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0367,
+        "sequence_id": 19
+      }
+    },
+    {
+      "timestamp": 5.167347207898274,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 4.7200003,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0221,
+        "sequence_id": 20
+      }
+    },
+    {
+      "timestamp": 5.415992958005518,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 4.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1116,
+        "sequence_id": 21
+      }
+    },
+    {
+      "timestamp": 5.703707166947424,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 5.2,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0883,
+        "sequence_id": 22
+      }
+    },
+    {
+      "timestamp": 5.923421707935631,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 5.44,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0663,
+        "sequence_id": 23
+      }
+    },
+    {
+      "timestamp": 6.128664416959509,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 5.68,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0324,
+        "sequence_id": 24
+      }
+    },
+    {
+      "timestamp": 6.382756792008877,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 5.92,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0138,
+        "sequence_id": 25
+      }
+    },
+    {
+      "timestamp": 6.629080249927938,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.36,
+        "audio_window_end": 6.16,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0064,
+        "sequence_id": 26
+      }
+    }
+  ],
+  "transcript": "No."
+}
--- a/evals/stt/results/not_so_sure-deepgram-flux.json
+++ b/evals/stt/results/not_so_sure-deepgram-flux.json
@ -0,0 +1,678 @@
+{
+  "audio_file": "not_so_sure.m4a",
+  "audio_path": "../audio/not_so_sure.m4a",
+  "provider": "deepgram-flux",
+  "duration": 3.784853,
+  "created_at": "2026-01-20T13:34:30.619814",
+  "events": [
+    {
+      "timestamp": 4.1606836020946503e-07,
+      "event_type": "Connected",
+      "data": {
+        "type": "Connected",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "sequence_id": 0
+      }
+    },
+    {
+      "timestamp": 0.6479636249132454,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2837,
+        "sequence_id": 1
+      }
+    },
+    {
+      "timestamp": 0.8711565409321338,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1409,
+        "sequence_id": 2
+      }
+    },
+    {
+      "timestamp": 1.0940386659931391,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.103,
+        "sequence_id": 3
+      }
+    },
+    {
+      "timestamp": 1.3378053328488022,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "StartOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.96,
+        "transcript": "I don",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 0.8521
+          },
+          {
+            "word": "don",
+            "confidence": 0.9858
+          }
+        ],
+        "end_of_turn_confidence": 0.1526,
+        "sequence_id": 4
+      }
+    },
+    {
+      "timestamp": 1.575752625009045,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.2,
+        "transcript": "I don't know",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know",
+            "confidence": 0.9956
+          }
+        ],
+        "end_of_turn_confidence": 0.0815,
+        "sequence_id": 5
+      }
+    },
+    {
+      "timestamp": 1.809568207943812,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.44,
+        "transcript": "I don't know. I",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know.",
+            "confidence": 1.0
+          },
+          {
+            "word": "I",
+            "confidence": 0.9995
+          }
+        ],
+        "end_of_turn_confidence": 0.0533,
+        "sequence_id": 6
+      }
+    },
+    {
+      "timestamp": 2.0778977079316974,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.6800001,
+        "transcript": "I don't know. I'm not",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know.",
+            "confidence": 1.0
+          },
+          {
+            "word": "I'm",
+            "confidence": 1.0
+          },
+          {
+            "word": "not",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.0296,
+        "sequence_id": 7
+      }
+    },
+    {
+      "timestamp": 2.3323032909538597,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.9200001,
+        "transcript": "I don't know. I'm not sure she",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know.",
+            "confidence": 1.0
+          },
+          {
+            "word": "I'm",
+            "confidence": 1.0
+          },
+          {
+            "word": "not",
+            "confidence": 1.0
+          },
+          {
+            "word": "sure",
+            "confidence": 0.9692
+          },
+          {
+            "word": "she",
+            "confidence": 0.6968
+          }
+        ],
+        "end_of_turn_confidence": 0.1591,
+        "sequence_id": 8
+      }
+    },
+    {
+      "timestamp": 2.563972583040595,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.16,
+        "transcript": "I don't know. I'm not so sure.",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know.",
+            "confidence": 1.0
+          },
+          {
+            "word": "I'm",
+            "confidence": 1.0
+          },
+          {
+            "word": "not",
+            "confidence": 1.0
+          },
+          {
+            "word": "so",
+            "confidence": 0.9971
+          },
+          {
+            "word": "sure.",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.5312,
+        "sequence_id": 9
+      }
+    },
+    {
+      "timestamp": 2.766235665883869,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "EndOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 2.32,
+        "transcript": "I don't know. I'm not so sure.",
+        "words": [
+          {
+            "word": "I",
+            "confidence": 1.0
+          },
+          {
+            "word": "don't",
+            "confidence": 1.0
+          },
+          {
+            "word": "know.",
+            "confidence": 1.0
+          },
+          {
+            "word": "I'm",
+            "confidence": 1.0
+          },
+          {
+            "word": "not",
+            "confidence": 1.0
+          },
+          {
+            "word": "so",
+            "confidence": 0.9971
+          },
+          {
+            "word": "sure.",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.7129,
+        "sequence_id": 10
+      }
+    },
+    {
+      "timestamp": 2.980985000031069,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 2.56,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.6235,
+        "sequence_id": 11
+      }
+    },
+    {
+      "timestamp": 3.040183125063777,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 2.6399999,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.7163,
+        "sequence_id": 12
+      }
+    },
+    {
+      "timestamp": 3.134053166024387,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 2.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.7603,
+        "sequence_id": 13
+      }
+    },
+    {
+      "timestamp": 3.200523457955569,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 2.8,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.8013,
+        "sequence_id": 14
+      }
+    },
+    {
+      "timestamp": 3.3396010829601437,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 2.88,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.8052,
+        "sequence_id": 15
+      }
+    },
+    {
+      "timestamp": 3.462065916042775,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 3.04,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.6968,
+        "sequence_id": 16
+      }
+    },
+    {
+      "timestamp": 3.532107833074406,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 3.12,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.7026,
+        "sequence_id": 17
+      }
+    },
+    {
+      "timestamp": 3.6854247499722987,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 3.28,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.6123,
+        "sequence_id": 18
+      }
+    },
+    {
+      "timestamp": 3.9346718329470605,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 3.52,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.4551,
+        "sequence_id": 19
+      }
+    },
+    {
+      "timestamp": 4.174561291001737,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 3.76,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.293,
+        "sequence_id": 20
+      }
+    },
+    {
+      "timestamp": 4.423174874857068,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 4.0,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1186,
+        "sequence_id": 21
+      }
+    },
+    {
+      "timestamp": 4.661856249906123,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 4.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1186,
+        "sequence_id": 22
+      }
+    },
+    {
+      "timestamp": 4.934342915890738,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 4.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0629,
+        "sequence_id": 23
+      }
+    },
+    {
+      "timestamp": 5.1988217500038445,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 4.7200003,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0302,
+        "sequence_id": 24
+      }
+    },
+    {
+      "timestamp": 5.868438957957551,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 4.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0104,
+        "sequence_id": 25
+      }
+    },
+    {
+      "timestamp": 5.924830165924504,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 5.2,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0039,
+        "sequence_id": 26
+      }
+    },
+    {
+      "timestamp": 6.008775374852121,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 5.44,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.003,
+        "sequence_id": 27
+      }
+    },
+    {
+      "timestamp": 6.224981207866222,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 5.68,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0027,
+        "sequence_id": 28
+      }
+    },
+    {
+      "timestamp": 6.400387583067641,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 5.92,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0944,
+        "sequence_id": 29
+      }
+    },
+    {
+      "timestamp": 6.6102081660646945,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 6.16,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.083,
+        "sequence_id": 30
+      }
+    },
+    {
+      "timestamp": 6.853603166062385,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 6.4,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0674,
+        "sequence_id": 31
+      }
+    },
+    {
+      "timestamp": 7.1176844160072505,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 2.32,
+        "audio_window_end": 6.64,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0348,
+        "sequence_id": 32
+      }
+    }
+  ],
+  "transcript": "I don't know. I'm not so sure."
+}
--- a/evals/stt/results/not_so_sure-speechmatics.json
+++ b/evals/stt/results/not_so_sure-speechmatics.json
@ -0,0 +1,936 @@
+{
+  "audio_file": "not_so_sure.m4a",
+  "audio_path": "../audio/not_so_sure.m4a",
+  "provider": "speechmatics",
+  "duration": 3.784853,
+  "created_at": "2026-01-20T13:38:01.957263",
+  "events": [
+    {
+      "timestamp": 2.50060111284256e-07,
+      "event_type": "Info",
+      "data": {
+        "message": "Info",
+        "type": "concurrent_session_usage",
+        "reason": "1 concurrent sessions active out of quota 2",
+        "usage": 1,
+        "quota": 2,
+        "last_updated": "2026-01-20T08:07:53Z"
+      }
+    },
+    {
+      "timestamp": 0.17636274988763034,
+      "event_type": "RecognitionStarted",
+      "data": {
+        "message": "RecognitionStarted",
+        "orchestrator_version": "2026.01.09+e449221ca0+14.12.0",
+        "id": "ff50bcc6-03cc-4609-b52b-c61492be97b0",
+        "language_pack_info": {
+          "adapted": false,
+          "itn": true,
+          "language_description": "English",
+          "word_delimiter": " ",
+          "writing_direction": "left-to-right"
+        }
+      }
+    },
+    {
+      "timestamp": 0.1765422080643475,
+      "event_type": "Info",
+      "data": {
+        "message": "Info",
+        "type": "recognition_quality",
+        "reason": "Running recognition using a broadcast model quality.",
+        "quality": "broadcast"
+      }
+    },
+    {
+      "timestamp": 0.44156987499445677,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 1
+      }
+    },
+    {
+      "timestamp": 0.5090052080340683,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 2
+      }
+    },
+    {
+      "timestamp": 0.5927771248389035,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 3
+      }
+    },
+    {
+      "timestamp": 0.6792412919458002,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 4
+      }
+    },
+    {
+      "timestamp": 0.7540834578685462,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 5
+      }
+    },
+    {
+      "timestamp": 0.8363401249516755,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 6
+      }
+    },
+    {
+      "timestamp": 0.916276125004515,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 7
+      }
+    },
+    {
+      "timestamp": 1.0025545828975737,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 8
+      }
+    },
+    {
+      "timestamp": 1.0930295418947935,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 9
+      }
+    },
+    {
+      "timestamp": 1.1681176249403507,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 10
+      }
+    },
+    {
+      "timestamp": 1.2440201670397073,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 11
+      }
+    },
+    {
+      "timestamp": 1.3254928330425173,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 12
+      }
+    },
+    {
+      "timestamp": 1.411379124969244,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 13
+      }
+    },
+    {
+      "timestamp": 1.4989973329938948,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 14
+      }
+    },
+    {
+      "timestamp": 1.569762917002663,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 15
+      }
+    },
+    {
+      "timestamp": 1.6669557499699295,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 16
+      }
+    },
+    {
+      "timestamp": 1.7321407499257475,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 17
+      }
+    },
+    {
+      "timestamp": 1.8123597078956664,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 18
+      }
+    },
+    {
+      "timestamp": 1.89311487483792,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 19
+      }
+    },
+    {
+      "timestamp": 1.99575070803985,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 20
+      }
+    },
+    {
+      "timestamp": 2.0635348330251873,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 21
+      }
+    },
+    {
+      "timestamp": 2.136281125014648,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 22
+      }
+    },
+    {
+      "timestamp": 2.2212352079804987,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 23
+      }
+    },
+    {
+      "timestamp": 2.300102249952033,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 24
+      }
+    },
+    {
+      "timestamp": 2.3838018749374896,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 25
+      }
+    },
+    {
+      "timestamp": 2.4612751249223948,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 26
+      }
+    },
+    {
+      "timestamp": 2.5520844168495387,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 27
+      }
+    },
+    {
+      "timestamp": 2.6254100420046598,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 28
+      }
+    },
+    {
+      "timestamp": 2.7110170419327915,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 29
+      }
+    },
+    {
+      "timestamp": 2.793728666845709,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 30
+      }
+    },
+    {
+      "timestamp": 2.8698849170468748,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 31
+      }
+    },
+    {
+      "timestamp": 2.9517348748631775,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 32
+      }
+    },
+    {
+      "timestamp": 3.034996416885406,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 33
+      }
+    },
+    {
+      "timestamp": 3.1222795830108225,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 34
+      }
+    },
+    {
+      "timestamp": 3.2133053748402745,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 35
+      }
+    },
+    {
+      "timestamp": 3.2794892080128193,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 36
+      }
+    },
+    {
+      "timestamp": 3.360972832888365,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 37
+      }
+    },
+    {
+      "timestamp": 3.480351625010371,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 38
+      }
+    },
+    {
+      "timestamp": 3.527200457872823,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 39
+      }
+    },
+    {
+      "timestamp": 3.614834832958877,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 40
+      }
+    },
+    {
+      "timestamp": 3.7000621668994427,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 41
+      }
+    },
+    {
+      "timestamp": 3.7709098330233246,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 42
+      }
+    },
+    {
+      "timestamp": 3.870571249863133,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 43
+      }
+    },
+    {
+      "timestamp": 3.9319135828409344,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 44
+      }
+    },
+    {
+      "timestamp": 4.0240056668408215,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 45
+      }
+    },
+    {
+      "timestamp": 4.1135993748903275,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 46
+      }
+    },
+    {
+      "timestamp": 4.178906166926026,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 47
+      }
+    },
+    {
+      "timestamp": 4.262735291849822,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 48
+      }
+    },
+    {
+      "timestamp": 4.3524885000661016,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 49
+      }
+    },
+    {
+      "timestamp": 4.42170758289285,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 50
+      }
+    },
+    {
+      "timestamp": 4.503200083039701,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 51
+      }
+    },
+    {
+      "timestamp": 4.588893749983981,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 52
+      }
+    },
+    {
+      "timestamp": 4.6728779170662165,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 53
+      }
+    },
+    {
+      "timestamp": 4.749415792059153,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 54
+      }
+    },
+    {
+      "timestamp": 4.834314750041813,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 55
+      }
+    },
+    {
+      "timestamp": 4.934304124908522,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 56
+      }
+    },
+    {
+      "timestamp": 5.015187042066827,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 57
+      }
+    },
+    {
+      "timestamp": 5.083739625057206,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 58
+      }
+    },
+    {
+      "timestamp": 5.15739579196088,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 59
+      }
+    },
+    {
+      "timestamp": 5.254215708002448,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 60
+      }
+    },
+    {
+      "timestamp": 5.319055167026818,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 61
+      }
+    },
+    {
+      "timestamp": 5.422228208044544,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 62
+      }
+    },
+    {
+      "timestamp": 5.493815457914025,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 63
+      }
+    },
+    {
+      "timestamp": 5.562712874962017,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 64
+      }
+    },
+    {
+      "timestamp": 5.677756666904315,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 65
+      }
+    },
+    {
+      "timestamp": 5.728489124914631,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 66
+      }
+    },
+    {
+      "timestamp": 5.73234708304517,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "I",
+                "language": "en"
+              }
+            ],
+            "end_time": 0.8,
+            "start_time": 0.64,
+            "type": "word"
+          }
+        ],
+        "metadata": {
+          "end_time": 0.8,
+          "start_time": 0.0,
+          "transcript": "I "
+        }
+      }
+    },
+    {
+      "timestamp": 5.831468666903675,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 67
+      }
+    },
+    {
+      "timestamp": 5.9311752079520375,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 68
+      }
+    },
+    {
+      "timestamp": 5.970860542031005,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 69
+      }
+    },
+    {
+      "timestamp": 6.0573643748648465,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 70
+      }
+    },
+    {
+      "timestamp": 6.071638958062977,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "don't",
+                "language": "en"
+              }
+            ],
+            "end_time": 1.08,
+            "start_time": 0.84,
+            "type": "word"
+          },
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "know",
+                "language": "en"
+              }
+            ],
+            "end_time": 1.2,
+            "start_time": 1.08,
+            "type": "word"
+          },
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": ".",
+                "language": "en"
+              }
+            ],
+            "attaches_to": "previous",
+            "end_time": 1.2,
+            "is_eos": true,
+            "start_time": 1.2,
+            "type": "punctuation"
+          }
+        ],
+        "metadata": {
+          "end_time": 1.2,
+          "start_time": 0.8,
+          "transcript": "don't know. "
+        }
+      }
+    },
+    {
+      "timestamp": 6.143923291936517,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 71
+      }
+    },
+    {
+      "timestamp": 6.229828458046541,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 72
+      }
+    },
+    {
+      "timestamp": 6.297467292053625,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 73
+      }
+    },
+    {
+      "timestamp": 6.388417499838397,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 74
+      }
+    },
+    {
+      "timestamp": 6.46747541683726,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "I'm",
+                "language": "en"
+              }
+            ],
+            "end_time": 1.4,
+            "start_time": 1.2,
+            "type": "word"
+          },
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "not",
+                "language": "en"
+              }
+            ],
+            "end_time": 1.56,
+            "start_time": 1.4,
+            "type": "word"
+          }
+        ],
+        "metadata": {
+          "end_time": 1.56,
+          "start_time": 1.2,
+          "transcript": "I'm not "
+        }
+      }
+    },
+    {
+      "timestamp": 6.467542249942198,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 75
+      }
+    },
+    {
+      "timestamp": 6.571689167059958,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 76
+      }
+    },
+    {
+      "timestamp": 6.633496082853526,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 77
+      }
+    },
+    {
+      "timestamp": 6.705628624884412,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 78
+      }
+    },
+    {
+      "timestamp": 6.791943500051275,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 79
+      }
+    },
+    {
+      "timestamp": 6.8231504168361425,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "so",
+                "language": "en"
+              }
+            ],
+            "end_time": 1.72,
+            "start_time": 1.56,
+            "type": "word"
+          }
+        ],
+        "metadata": {
+          "end_time": 1.72,
+          "start_time": 1.56,
+          "transcript": "so "
+        }
+      }
+    },
+    {
+      "timestamp": 6.889297208050266,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 80
+      }
+    },
+    {
+      "timestamp": 6.96820458304137,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 81
+      }
+    },
+    {
+      "timestamp": 7.030788874952123,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 82
+      }
+    },
+    {
+      "timestamp": 7.114988874876872,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 83
+      }
+    },
+    {
+      "timestamp": 7.1660370419267565,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": "sure",
+                "language": "en"
+              }
+            ],
+            "end_time": 2.2,
+            "start_time": 1.76,
+            "type": "word"
+          },
+          {
+            "alternatives": [
+              {
+                "confidence": 1.0,
+                "content": ".",
+                "language": "en"
+              }
+            ],
+            "attaches_to": "previous",
+            "end_time": 2.2,
+            "is_eos": true,
+            "start_time": 2.2,
+            "type": "punctuation"
+          }
+        ],
+        "metadata": {
+          "end_time": 2.2,
+          "start_time": 1.72,
+          "transcript": "sure. "
+        }
+      }
+    },
+    {
+      "timestamp": 7.197767958045006,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 84
+      }
+    },
+    {
+      "timestamp": 7.281636083032936,
+      "event_type": "AudioAdded",
+      "data": {
+        "message": "AudioAdded",
+        "seq_no": 85
+      }
+    },
+    {
+      "timestamp": 7.966639708029106,
+      "event_type": "AddTranscript",
+      "data": {
+        "message": "AddTranscript",
+        "format": "2.9",
+        "results": [],
+        "metadata": {
+          "end_time": 6.72,
+          "start_time": 2.28,
+          "transcript": ""
+        }
+      }
+    },
+    {
+      "timestamp": 7.966674832860008,
+      "event_type": "EndOfTranscript",
+      "data": {
+        "message": "EndOfTranscript"
+      }
+    }
+  ],
+  "transcript": "I don't know. I'm not so sure."
+}
--- a/evals/stt/results/vad-deepgram-flux.json
+++ b/evals/stt/results/vad-deepgram-flux.json
--- a/evals/stt/results/vad-deepgram.json
+++ b/evals/stt/results/vad-deepgram.json
--- a/evals/stt/results/yes-deepgram-flux.json
+++ b/evals/stt/results/yes-deepgram-flux.json
@ -0,0 +1,402 @@
+{
+  "audio_file": "yes.m4a",
+  "audio_path": "../audio/yes.m4a",
+  "provider": "deepgram-flux",
+  "duration": 2.507755,
+  "created_at": "2026-01-20T13:33:37.737569",
+  "events": [
+    {
+      "timestamp": 2.0791776478290558e-07,
+      "event_type": "Connected",
+      "data": {
+        "type": "Connected",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "sequence_id": 0
+      }
+    },
+    {
+      "timestamp": 0.6149860408622772,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2494,
+        "sequence_id": 1
+      }
+    },
+    {
+      "timestamp": 0.8699209159240127,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1246,
+        "sequence_id": 2
+      }
+    },
+    {
+      "timestamp": 1.0665327080059797,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0557,
+        "sequence_id": 3
+      }
+    },
+    {
+      "timestamp": 1.319559457944706,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "StartOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 0.96,
+        "transcript": "Yes.",
+        "words": [
+          {
+            "word": "Yes.",
+            "confidence": 0.9761
+          }
+        ],
+        "end_of_turn_confidence": 0.0793,
+        "sequence_id": 4
+      }
+    },
+    {
+      "timestamp": 1.5604322908911854,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.2,
+        "transcript": "Yes.",
+        "words": [
+          {
+            "word": "Yes.",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.5703,
+        "sequence_id": 5
+      }
+    },
+    {
+      "timestamp": 1.6325784579385072,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "EndOfTurn",
+        "turn_index": 0,
+        "audio_window_start": 0.0,
+        "audio_window_end": 1.28,
+        "transcript": "Yes.",
+        "words": [
+          {
+            "word": "Yes.",
+            "confidence": 1.0
+          }
+        ],
+        "end_of_turn_confidence": 0.7026,
+        "sequence_id": 6
+      }
+    },
+    {
+      "timestamp": 1.897370790829882,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 1.52,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.4883,
+        "sequence_id": 7
+      }
+    },
+    {
+      "timestamp": 2.117000916041434,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 1.76,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.3801,
+        "sequence_id": 8
+      }
+    },
+    {
+      "timestamp": 2.3733394159935415,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 2.0,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.2346,
+        "sequence_id": 9
+      }
+    },
+    {
+      "timestamp": 2.6072654998861253,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 2.24,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1049,
+        "sequence_id": 10
+      }
+    },
+    {
+      "timestamp": 2.85038537485525,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 2.48,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.075,
+        "sequence_id": 11
+      }
+    },
+    {
+      "timestamp": 3.091235165949911,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 2.72,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0218,
+        "sequence_id": 12
+      }
+    },
+    {
+      "timestamp": 3.3325049998238683,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 2.96,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.03,
+        "sequence_id": 13
+      }
+    },
+    {
+      "timestamp": 3.577521916013211,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 3.2,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0189,
+        "sequence_id": 14
+      }
+    },
+    {
+      "timestamp": 3.8645569998770952,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 3.44,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0118,
+        "sequence_id": 15
+      }
+    },
+    {
+      "timestamp": 4.106258499901742,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 3.68,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0089,
+        "sequence_id": 16
+      }
+    },
+    {
+      "timestamp": 4.346511875046417,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 3.92,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0073,
+        "sequence_id": 17
+      }
+    },
+    {
+      "timestamp": 4.589668208034709,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 4.16,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0053,
+        "sequence_id": 18
+      }
+    },
+    {
+      "timestamp": 4.826804416021332,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 4.4,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0034,
+        "sequence_id": 19
+      }
+    },
+    {
+      "timestamp": 5.060472874902189,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 4.64,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0024,
+        "sequence_id": 20
+      }
+    },
+    {
+      "timestamp": 5.304136332822964,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 4.88,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1091,
+        "sequence_id": 21
+      }
+    },
+    {
+      "timestamp": 5.544230999890715,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 5.12,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.1007,
+        "sequence_id": 22
+      }
+    },
+    {
+      "timestamp": 5.779906540876254,
+      "event_type": "TurnInfo",
+      "data": {
+        "type": "TurnInfo",
+        "request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
+        "event": "Update",
+        "turn_index": 1,
+        "audio_window_start": 1.28,
+        "audio_window_end": 5.36,
+        "transcript": "",
+        "words": [],
+        "end_of_turn_confidence": 0.0565,
+        "sequence_id": 23
+      }
+    }
+  ],
+  "transcript": "Yes."
+}
--- a/evals/visualizer/.gitignore
+++ b/evals/visualizer/.gitignore
@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
--- a/evals/visualizer/README.md
+++ b/evals/visualizer/README.md
@ -0,0 +1,36 @@
+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+
+## Getting Started
+
+First, run the development server:
+
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+
+## Learn More
+
+To learn more about Next.js, take a look at the following resources:
+
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+
+## Deploy on Vercel
+
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
--- a/evals/visualizer/eslint.config.mjs
+++ b/evals/visualizer/eslint.config.mjs
@ -0,0 +1,18 @@
+import { defineConfig, globalIgnores } from "eslint/config";
+import nextVitals from "eslint-config-next/core-web-vitals";
+import nextTs from "eslint-config-next/typescript";
+
+const eslintConfig = defineConfig([
+  ...nextVitals,
+  ...nextTs,
+  // Override default ignores of eslint-config-next.
+  globalIgnores([
+    // Default ignores of eslint-config-next:
+    ".next/**",
+    "out/**",
+    "build/**",
+    "next-env.d.ts",
+  ]),
+]);
+
+export default eslintConfig;
--- a/evals/visualizer/next.config.ts
+++ b/evals/visualizer/next.config.ts
@ -0,0 +1,7 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  /* config options here */
+};
+
+export default nextConfig;
--- a/evals/visualizer/package.json
+++ b/evals/visualizer/package.json
@ -0,0 +1,26 @@
+{
+  "name": "visualizer",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "eslint"
+  },
+  "dependencies": {
+    "next": "16.1.4",
+    "react": "19.2.3",
+    "react-dom": "19.2.3"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "16.1.4",
+    "tailwindcss": "^4",
+    "typescript": "^5"
+  }
+}
--- a/evals/visualizer/pnpm-lock.yaml
+++ b/evals/visualizer/pnpm-lock.yaml
--- a/evals/visualizer/pnpm-workspace.yaml
+++ b/evals/visualizer/pnpm-workspace.yaml
@ -0,0 +1,5 @@
+packages:
+  - .
+ignoredBuiltDependencies:
+  - sharp
+  - unrs-resolver
--- a/evals/visualizer/postcss.config.mjs
+++ b/evals/visualizer/postcss.config.mjs
@ -0,0 +1,7 @@
+const config = {
+  plugins: {
+    "@tailwindcss/postcss": {},
+  },
+};
+
+export default config;
--- a/evals/visualizer/public/file.svg
+++ b/evals/visualizer/public/file.svg
@ -0,0 +1 @@
+<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
--- a/evals/visualizer/public/globe.svg
+++ b/evals/visualizer/public/globe.svg
@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
--- a/evals/visualizer/public/next.svg
+++ b/evals/visualizer/public/next.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
--- a/evals/visualizer/public/vercel.svg
+++ b/evals/visualizer/public/vercel.svg
@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
--- a/evals/visualizer/public/window.svg
+++ b/evals/visualizer/public/window.svg
@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
--- a/evals/visualizer/src/app/api/audio/[filename]/route.ts
+++ b/evals/visualizer/src/app/api/audio/[filename]/route.ts
@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from "next/server";
+import fs from "fs";
+import path from "path";
+
+const AUDIO_DIR = path.join(process.cwd(), "..", "stt", "audio");
+
+const MIME_TYPES: Record<string, string> = {
+  ".mp3": "audio/mpeg",
+  ".wav": "audio/wav",
+  ".m4a": "audio/mp4",
+  ".ogg": "audio/ogg",
+  ".webm": "audio/webm",
+};
+
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ filename: string }> }
+) {
+  try {
+    const { filename } = await params;
+    const filePath = path.join(AUDIO_DIR, filename);
+
+    if (!fs.existsSync(filePath)) {
+      return NextResponse.json({ error: "Audio file not found" }, { status: 404 });
+    }
+
+    const ext = path.extname(filename).toLowerCase();
+    const contentType = MIME_TYPES[ext] || "application/octet-stream";
+
+    const fileBuffer = fs.readFileSync(filePath);
+
+    return new NextResponse(fileBuffer, {
+      headers: {
+        "Content-Type": contentType,
+        "Content-Length": fileBuffer.length.toString(),
+      },
+    });
+  } catch (error) {
+    console.error("Error serving audio:", error);
+    return NextResponse.json({ error: "Failed to serve audio" }, { status: 500 });
+  }
+}
--- a/evals/visualizer/src/app/api/results/[id]/route.ts
+++ b/evals/visualizer/src/app/api/results/[id]/route.ts
@ -0,0 +1,27 @@
+import { NextRequest, NextResponse } from "next/server";
+import fs from "fs";
+import path from "path";
+
+const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
+
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ id: string }> }
+) {
+  try {
+    const { id } = await params;
+    const filePath = path.join(RESULTS_DIR, `${id}.json`);
+
+    if (!fs.existsSync(filePath)) {
+      return NextResponse.json({ error: "Result not found" }, { status: 404 });
+    }
+
+    const content = fs.readFileSync(filePath, "utf-8");
+    const data = JSON.parse(content);
+
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error("Error reading result:", error);
+    return NextResponse.json({ error: "Failed to read result" }, { status: 500 });
+  }
+}
--- a/evals/visualizer/src/app/api/results/route.ts
+++ b/evals/visualizer/src/app/api/results/route.ts
@ -0,0 +1,47 @@
+import { NextResponse } from "next/server";
+import fs from "fs";
+import path from "path";
+import { ResultSummary, EventCaptureResult } from "@/types";
+
+const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
+
+export async function GET() {
+  try {
+    if (!fs.existsSync(RESULTS_DIR)) {
+      return NextResponse.json([]);
+    }
+
+    const files = fs.readdirSync(RESULTS_DIR).filter((f) => f.endsWith(".json"));
+    const results: ResultSummary[] = [];
+
+    for (const file of files) {
+      try {
+        const filePath = path.join(RESULTS_DIR, file);
+        const content = fs.readFileSync(filePath, "utf-8");
+        const data: EventCaptureResult = JSON.parse(content);
+
+        results.push({
+          id: file.replace(".json", ""),
+          audio_file: data.audio_file,
+          provider: data.provider,
+          duration: data.duration,
+          created_at: data.created_at,
+          event_count: data.events.length,
+        });
+      } catch {
+        console.error(`Failed to parse ${file}`);
+      }
+    }
+
+    // Sort by created_at descending
+    results.sort(
+      (a, b) =>
+        new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
+    );
+
+    return NextResponse.json(results);
+  } catch (error) {
+    console.error("Error reading results:", error);
+    return NextResponse.json({ error: "Failed to read results" }, { status: 500 });
+  }
+}
--- a/evals/visualizer/src/app/favicon.ico
+++ b/evals/visualizer/src/app/favicon.ico
--- a/evals/visualizer/src/app/globals.css
+++ b/evals/visualizer/src/app/globals.css
@ -0,0 +1,26 @@
+@import "tailwindcss";
+
+:root {
+  --background: #ffffff;
+  --foreground: #171717;
+}
+
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+}
+
+@media (prefers-color-scheme: dark) {
+  :root {
+    --background: #0a0a0a;
+    --foreground: #ededed;
+  }
+}
+
+body {
+  background: var(--background);
+  color: var(--foreground);
+  font-family: Arial, Helvetica, sans-serif;
+}
--- a/evals/visualizer/src/app/layout.tsx
+++ b/evals/visualizer/src/app/layout.tsx
@ -0,0 +1,34 @@
+import type { Metadata } from "next";
+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+
+export const metadata: Metadata = {
+  title: "STT Event Visualizer",
+  description: "Visualize WebSocket events from STT providers",
+};
+
+export default function RootLayout({
+  children,
+}: Readonly<{
+  children: React.ReactNode;
+}>) {
+  return (
+    <html lang="en">
+      <body
+        className={`${geistSans.variable} ${geistMono.variable} antialiased`}
+      >
+        {children}
+      </body>
+    </html>
+  );
+}
--- a/evals/visualizer/src/app/page.tsx
+++ b/evals/visualizer/src/app/page.tsx
@ -0,0 +1,129 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import Link from "next/link";
+import { ResultSummary } from "@/types";
+
+function formatDuration(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = Math.floor(seconds % 60);
+  return `${mins}:${secs.toString().padStart(2, "0")}`;
+}
+
+function formatDate(isoString: string): string {
+  const date = new Date(isoString);
+  return date.toLocaleDateString("en-US", {
+    year: "numeric",
+    month: "short",
+    day: "numeric",
+    hour: "2-digit",
+    minute: "2-digit",
+  });
+}
+
+const PROVIDER_COLORS: Record<string, string> = {
+  deepgram: "bg-blue-500/20 text-blue-300",
+  "deepgram-flux": "bg-green-500/20 text-green-300",
+  speechmatics: "bg-purple-500/20 text-purple-300",
+};
+
+export default function Home() {
+  const [results, setResults] = useState<ResultSummary[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    async function fetchResults() {
+      try {
+        const response = await fetch("/api/results");
+        if (!response.ok) {
+          throw new Error("Failed to fetch results");
+        }
+        const data = await response.json();
+        setResults(data);
+      } catch (err) {
+        setError(err instanceof Error ? err.message : "Unknown error");
+      } finally {
+        setLoading(false);
+      }
+    }
+
+    fetchResults();
+  }, []);
+
+  return (
+    <div className="min-h-screen bg-zinc-950 text-white">
+      <div className="max-w-4xl mx-auto px-6 py-12">
+        <header className="mb-12">
+          <h1 className="text-3xl font-bold">STT Event Visualizer</h1>
+          <p className="text-zinc-400 mt-2">
+            Visualize captured WebSocket events from STT providers
+          </p>
+        </header>
+
+        {loading && (
+          <div className="flex items-center justify-center py-12">
+            <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
+          </div>
+        )}
+
+        {error && (
+          <div className="bg-red-500/20 text-red-300 p-4 rounded-lg">
+            {error}
+          </div>
+        )}
+
+        {!loading && !error && results.length === 0 && (
+          <div className="text-center py-12 text-zinc-500">
+            <p className="text-lg mb-4">No results found</p>
+            <p className="text-sm">
+              Run the event capture script to generate results:
+            </p>
+            <code className="block mt-2 bg-zinc-800 p-3 rounded text-zinc-300 text-sm">
+              python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
+            </code>
+          </div>
+        )}
+
+        {!loading && !error && results.length > 0 && (
+          <div className="space-y-3">
+            {results.map((result) => (
+              <Link
+                key={result.id}
+                href={`/view/${result.id}`}
+                className="block bg-zinc-900 hover:bg-zinc-800 rounded-lg p-4 transition-colors"
+              >
+                <div className="flex items-center justify-between">
+                  <div className="space-y-1">
+                    <div className="flex items-center gap-3">
+                      <span className="font-medium">{result.audio_file}</span>
+                      <span
+                        className={`text-xs px-2 py-0.5 rounded ${
+                          PROVIDER_COLORS[result.provider] ||
+                          "bg-zinc-700 text-zinc-300"
+                        }`}
+                      >
+                        {result.provider}
+                      </span>
+                    </div>
+                    <div className="text-sm text-zinc-500">
+                      {formatDate(result.created_at)}
+                    </div>
+                  </div>
+                  <div className="text-right space-y-1">
+                    <div className="text-sm text-zinc-400">
+                      {formatDuration(result.duration)}
+                    </div>
+                    <div className="text-xs text-zinc-500">
+                      {result.event_count} events
+                    </div>
+                  </div>
+                </div>
+              </Link>
+            ))}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
--- a/evals/visualizer/src/app/view/[id]/page.tsx
+++ b/evals/visualizer/src/app/view/[id]/page.tsx
@ -0,0 +1,158 @@
+"use client";
+
+import { useEffect, useState, useCallback } from "react";
+import { useParams } from "next/navigation";
+import Link from "next/link";
+import { EventCaptureResult } from "@/types";
+import AudioPlayer from "@/components/AudioPlayer";
+import EventTimeline from "@/components/EventTimeline";
+import EventList from "@/components/EventList";
+
+const PROVIDER_COLORS: Record<string, string> = {
+  deepgram: "bg-blue-500/20 text-blue-300",
+  "deepgram-flux": "bg-green-500/20 text-green-300",
+  speechmatics: "bg-purple-500/20 text-purple-300",
+};
+
+export default function ViewPage() {
+  const params = useParams();
+  const id = params.id as string;
+
+  const [result, setResult] = useState<EventCaptureResult | null>(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [currentTime, setCurrentTime] = useState(0);
+  const [isPlaying, setIsPlaying] = useState(false);
+
+  useEffect(() => {
+    async function fetchResult() {
+      try {
+        const response = await fetch(`/api/results/${id}`);
+        if (!response.ok) {
+          if (response.status === 404) {
+            throw new Error("Result not found");
+          }
+          throw new Error("Failed to fetch result");
+        }
+        const data = await response.json();
+        setResult(data);
+      } catch (err) {
+        setError(err instanceof Error ? err.message : "Unknown error");
+      } finally {
+        setLoading(false);
+      }
+    }
+
+    if (id) {
+      fetchResult();
+    }
+  }, [id]);
+
+  const handleTimeUpdate = useCallback((time: number) => {
+    setCurrentTime(time);
+  }, []);
+
+  const handlePlayingChange = useCallback((playing: boolean) => {
+    setIsPlaying(playing);
+  }, []);
+
+  const handleSeek = useCallback((time: number) => {
+    setCurrentTime(time);
+  }, []);
+
+  if (loading) {
+    return (
+      <div className="min-h-screen bg-zinc-950 text-white flex items-center justify-center">
+        <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div className="min-h-screen bg-zinc-950 text-white p-6">
+        <div className="max-w-4xl mx-auto">
+          <Link href="/" className="text-zinc-400 hover:text-white mb-4 inline-block">
+            &larr; Back to results
+          </Link>
+          <div className="bg-red-500/20 text-red-300 p-4 rounded-lg">{error}</div>
+        </div>
+      </div>
+    );
+  }
+
+  if (!result) {
+    return null;
+  }
+
+  const audioUrl = `/api/audio/${result.audio_file}`;
+
+  return (
+    <div className="min-h-screen bg-zinc-950 text-white">
+      <div className="max-w-7xl mx-auto px-6 py-6">
+        {/* Header */}
+        <header className="mb-6">
+          <Link href="/" className="text-zinc-400 hover:text-white mb-2 inline-block text-sm">
+            &larr; Back to results
+          </Link>
+          <div className="flex items-center gap-3">
+            <h1 className="text-2xl font-bold">{result.audio_file}</h1>
+            <span
+              className={`text-sm px-2 py-0.5 rounded ${
+                PROVIDER_COLORS[result.provider] || "bg-zinc-700 text-zinc-300"
+              }`}
+            >
+              {result.provider}
+            </span>
+          </div>
+          {result.transcript && (
+            <p className="text-zinc-400 mt-2 text-sm line-clamp-2">
+              {result.transcript}
+            </p>
+          )}
+        </header>
+
+        {/* Main content */}
+        <div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
+          {/* Left column: Audio player and timeline */}
+          <div className="lg:col-span-2 space-y-4">
+            <AudioPlayer
+              audioUrl={audioUrl}
+              duration={result.duration}
+              currentTime={currentTime}
+              onTimeUpdate={handleTimeUpdate}
+              onPlayingChange={handlePlayingChange}
+            />
+
+            <EventTimeline
+              events={result.events}
+              duration={result.duration}
+              currentTime={currentTime}
+              onSeek={handleSeek}
+            />
+
+            {/* Transcript section */}
+            {result.transcript && (
+              <div className="bg-zinc-800 rounded-lg p-4">
+                <div className="text-sm text-zinc-400 font-medium mb-2">
+                  Final Transcript
+                </div>
+                <p className="text-zinc-300">{result.transcript}</p>
+              </div>
+            )}
+          </div>
+
+          {/* Right column: Event list */}
+          <div className="lg:col-span-1 h-[calc(100vh-12rem)]">
+            <EventList
+              events={result.events}
+              currentTime={currentTime}
+              onSeek={handleSeek}
+              provider={result.provider}
+            />
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/AudioPlayer.tsx
+++ b/evals/visualizer/src/components/AudioPlayer.tsx
@ -0,0 +1,145 @@
+"use client";
+
+import { useRef, useEffect, useState, useCallback } from "react";
+
+interface AudioPlayerProps {
+  audioUrl: string;
+  duration: number;
+  currentTime: number;
+  onTimeUpdate: (time: number) => void;
+  onPlayingChange: (playing: boolean) => void;
+}
+
+function formatTime(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = Math.floor(seconds % 60);
+  return `${mins}:${secs.toString().padStart(2, "0")}`;
+}
+
+export default function AudioPlayer({
+  audioUrl,
+  duration,
+  currentTime,
+  onTimeUpdate,
+  onPlayingChange,
+}: AudioPlayerProps) {
+  const audioRef = useRef<HTMLAudioElement>(null);
+  const [isPlaying, setIsPlaying] = useState(false);
+  const [internalTime, setInternalTime] = useState(0);
+
+  useEffect(() => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    const handleTimeUpdate = () => {
+      setInternalTime(audio.currentTime);
+      onTimeUpdate(audio.currentTime);
+    };
+
+    const handlePlay = () => {
+      setIsPlaying(true);
+      onPlayingChange(true);
+    };
+
+    const handlePause = () => {
+      setIsPlaying(false);
+      onPlayingChange(false);
+    };
+
+    const handleEnded = () => {
+      setIsPlaying(false);
+      onPlayingChange(false);
+    };
+
+    audio.addEventListener("timeupdate", handleTimeUpdate);
+    audio.addEventListener("play", handlePlay);
+    audio.addEventListener("pause", handlePause);
+    audio.addEventListener("ended", handleEnded);
+
+    return () => {
+      audio.removeEventListener("timeupdate", handleTimeUpdate);
+      audio.removeEventListener("play", handlePlay);
+      audio.removeEventListener("pause", handlePause);
+      audio.removeEventListener("ended", handleEnded);
+    };
+  }, [onTimeUpdate, onPlayingChange]);
+
+  // Seek to currentTime when it changes externally
+  useEffect(() => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    // Only seek if the difference is significant (user clicked timeline)
+    if (Math.abs(audio.currentTime - currentTime) > 0.5) {
+      audio.currentTime = currentTime;
+    }
+  }, [currentTime]);
+
+  const togglePlay = useCallback(() => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    if (isPlaying) {
+      audio.pause();
+    } else {
+      audio.play();
+    }
+  }, [isPlaying]);
+
+  const handleSeek = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    const newTime = parseFloat(e.target.value);
+    audio.currentTime = newTime;
+    setInternalTime(newTime);
+    onTimeUpdate(newTime);
+  }, [onTimeUpdate]);
+
+  return (
+    <div className="bg-zinc-900 rounded-lg p-4 space-y-3">
+      <audio ref={audioRef} src={audioUrl} preload="metadata" />
+
+      <div className="flex items-center gap-4">
+        <button
+          onClick={togglePlay}
+          className="w-12 h-12 rounded-full bg-white text-black flex items-center justify-center hover:bg-zinc-200 transition-colors"
+        >
+          {isPlaying ? (
+            <svg className="w-5 h-5" fill="currentColor" viewBox="0 0 20 20">
+              <path
+                fillRule="evenodd"
+                d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zM7 8a1 1 0 012 0v4a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v4a1 1 0 102 0V8a1 1 0 00-1-1z"
+                clipRule="evenodd"
+              />
+            </svg>
+          ) : (
+            <svg className="w-5 h-5 ml-1" fill="currentColor" viewBox="0 0 20 20">
+              <path
+                fillRule="evenodd"
+                d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z"
+                clipRule="evenodd"
+              />
+            </svg>
+          )}
+        </button>
+
+        <div className="flex-1 space-y-1">
+          <input
+            type="range"
+            min={0}
+            max={duration}
+            step={0.1}
+            value={internalTime}
+            onChange={handleSeek}
+            className="w-full h-2 bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-white"
+          />
+          <div className="flex justify-between text-xs text-zinc-400">
+            <span>{formatTime(internalTime)}</span>
+            <span>{formatTime(duration)}</span>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/EventList.tsx
+++ b/evals/visualizer/src/components/EventList.tsx
@ -0,0 +1,141 @@
+"use client";
+
+import { useEffect, useRef, useMemo, useState } from "react";
+import { CapturedEvent } from "@/types";
+import { DeepgramEventItem, FluxEventItem, SpeechmaticsEventItem } from "./events";
+
+interface EventListProps {
+  events: CapturedEvent[];
+  currentTime: number;
+  onSeek: (time: number) => void;
+  provider: string;
+}
+
+function formatTime(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = Math.floor(seconds % 60);
+  const ms = Math.floor((seconds % 1) * 100);
+  return `${mins}:${secs.toString().padStart(2, "0")}.${ms.toString().padStart(2, "0")}`;
+}
+
+function getEventItemComponent(provider: string) {
+  if (provider === "deepgram-flux") {
+    return FluxEventItem;
+  }
+  if (provider === "speechmatics") {
+    return SpeechmaticsEventItem;
+  }
+  // Default to Deepgram Nova
+  return DeepgramEventItem;
+}
+
+export default function EventList({
+  events,
+  currentTime,
+  onSeek,
+  provider,
+}: EventListProps) {
+  const containerRef = useRef<HTMLDivElement>(null);
+  const [expandedEvents, setExpandedEvents] = useState<Set<number>>(new Set());
+  const [autoScroll, setAutoScroll] = useState(true);
+
+  const EventItemComponent = getEventItemComponent(provider);
+
+  // Find the current event index based on time
+  const currentEventIndex = useMemo(() => {
+    for (let i = events.length - 1; i >= 0; i--) {
+      if (events[i].timestamp <= currentTime) {
+        return i;
+      }
+    }
+    return -1;
+  }, [events, currentTime]);
+
+  // Auto-scroll to current event
+  useEffect(() => {
+    if (!autoScroll || currentEventIndex < 0) return;
+
+    const container = containerRef.current;
+    if (!container) return;
+
+    const eventElement = container.querySelector(`[data-index="${currentEventIndex}"]`);
+    if (eventElement) {
+      eventElement.scrollIntoView({ behavior: "smooth", block: "center" });
+    }
+  }, [currentEventIndex, autoScroll]);
+
+  const toggleExpand = (index: number) => {
+    setExpandedEvents((prev) => {
+      const next = new Set(prev);
+      if (next.has(index)) {
+        next.delete(index);
+      } else {
+        next.add(index);
+      }
+      return next;
+    });
+  };
+
+  return (
+    <div className="bg-zinc-800 rounded-lg flex flex-col h-full">
+      <div className="flex justify-between items-center px-4 py-2 border-b border-zinc-700">
+        <div className="text-sm text-zinc-400 font-medium">
+          Events ({events.length})
+        </div>
+        <label className="flex items-center gap-2 text-xs text-zinc-500 cursor-pointer">
+          <input
+            type="checkbox"
+            checked={autoScroll}
+            onChange={(e) => setAutoScroll(e.target.checked)}
+            className="rounded"
+          />
+          Auto-scroll
+        </label>
+      </div>
+
+      <div
+        ref={containerRef}
+        className="flex-1 overflow-y-auto divide-y divide-zinc-700/50"
+      >
+        {events.map((event, index) => {
+          const isCurrent = index === currentEventIndex;
+          const isExpanded = expandedEvents.has(index);
+
+          return (
+            <div
+              key={index}
+              data-index={index}
+              className={`p-3 cursor-pointer transition-colors ${
+                isCurrent ? "bg-zinc-700/50" : "hover:bg-zinc-700/30"
+              }`}
+              onClick={() => onSeek(event.timestamp)}
+            >
+              <div className="flex items-start gap-2">
+                {/* Current indicator */}
+                <div className="pt-1">
+                  {isCurrent ? (
+                    <div className="w-2 h-2 rounded-full bg-white" />
+                  ) : (
+                    <div className="w-2 h-2 rounded-full bg-zinc-600" />
+                  )}
+                </div>
+
+                {/* Timestamp */}
+                <span className="text-xs text-zinc-500 font-mono pt-0.5">
+                  {formatTime(event.timestamp)}
+                </span>
+
+                {/* Provider-specific event item */}
+                <EventItemComponent
+                  event={event}
+                  isExpanded={isExpanded}
+                  onToggleExpand={() => toggleExpand(index)}
+                />
+              </div>
+            </div>
+          );
+        })}
+      </div>
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/EventTimeline.tsx
+++ b/evals/visualizer/src/components/EventTimeline.tsx
@ -0,0 +1,119 @@
+"use client";
+
+import { useMemo } from "react";
+import { CapturedEvent } from "@/types";
+
+interface EventTimelineProps {
+  events: CapturedEvent[];
+  duration: number;
+  currentTime: number;
+  onSeek: (time: number) => void;
+}
+
+const EVENT_COLORS: Record<string, string> = {
+  Results: "bg-blue-500",
+  TurnInfo: "bg-green-500",
+  AddTranscript: "bg-purple-500",
+  Connected: "bg-yellow-500",
+  RecognitionStarted: "bg-yellow-500",
+  EndOfTranscript: "bg-red-500",
+  Metadata: "bg-gray-500",
+  Error: "bg-red-600",
+  default: "bg-zinc-400",
+};
+
+function formatTime(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = Math.floor(seconds % 60);
+  return `${mins}:${secs.toString().padStart(2, "0")}`;
+}
+
+export default function EventTimeline({
+  events,
+  duration,
+  currentTime,
+  onSeek,
+}: EventTimelineProps) {
+  const timeMarkers = useMemo(() => {
+    const markers: number[] = [];
+    const interval = Math.ceil(duration / 6);
+    for (let i = 0; i <= duration; i += interval) {
+      markers.push(i);
+    }
+    if (markers[markers.length - 1] !== Math.floor(duration)) {
+      markers.push(Math.floor(duration));
+    }
+    return markers;
+  }, [duration]);
+
+  const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
+    const rect = e.currentTarget.getBoundingClientRect();
+    const x = e.clientX - rect.left;
+    const percent = x / rect.width;
+    const time = percent * duration;
+    onSeek(Math.max(0, Math.min(time, duration)));
+  };
+
+  const progressPercent = (currentTime / duration) * 100;
+
+  return (
+    <div className="bg-zinc-800 rounded-lg p-4 space-y-2">
+      <div className="text-sm text-zinc-400 font-medium">Event Timeline</div>
+
+      <div
+        className="relative h-16 bg-zinc-900 rounded cursor-pointer overflow-hidden"
+        onClick={handleClick}
+      >
+        {/* Progress indicator */}
+        <div
+          className="absolute top-0 bottom-0 bg-zinc-700/50 pointer-events-none"
+          style={{ width: `${Math.min(progressPercent, 100)}%` }}
+        />
+
+        {/* Current time indicator */}
+        <div
+          className="absolute top-0 bottom-0 w-0.5 bg-white z-10 pointer-events-none"
+          style={{ left: `${Math.min(progressPercent, 100)}%` }}
+        />
+
+        {/* Event markers */}
+        <div className="absolute inset-0 flex items-center">
+          {events.map((event, index) => {
+            const leftPercent = Math.min((event.timestamp / duration) * 100, 100);
+            const colorClass =
+              EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
+
+            return (
+              <div
+                key={index}
+                className={`absolute w-2 h-8 rounded-sm ${colorClass} opacity-80 hover:opacity-100 transition-opacity`}
+                style={{ left: `${leftPercent}%`, transform: "translateX(-50%)" }}
+                title={`${formatTime(event.timestamp)} - ${event.event_type}`}
+              />
+            );
+          })}
+        </div>
+      </div>
+
+      {/* Time markers */}
+      <div className="flex justify-between text-xs text-zinc-500">
+        {timeMarkers.map((time, index) => (
+          <span key={index}>{formatTime(time)}</span>
+        ))}
+      </div>
+
+      {/* Legend */}
+      <div className="flex flex-wrap gap-3 pt-2">
+        {Object.entries(EVENT_COLORS)
+          .filter(([key]) => key !== "default")
+          .slice(0, 6)
+          .map(([eventType, colorClass]) => (
+            <div key={eventType} className="flex items-center gap-1 text-xs text-zinc-400">
+              <div className={`w-2 h-2 rounded-sm ${colorClass}`} />
+              <span>{eventType}</span>
+            </div>
+          ))}
+      </div>
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/events/DeepgramEventItem.tsx
+++ b/evals/visualizer/src/components/events/DeepgramEventItem.tsx
@ -0,0 +1,98 @@
+"use client";
+
+import { CapturedEvent } from "@/types";
+
+interface DeepgramEventItemProps {
+  event: CapturedEvent;
+  isExpanded: boolean;
+  onToggleExpand: () => void;
+}
+
+const EVENT_COLORS: Record<string, string> = {
+  Results: "text-blue-400 bg-blue-500/10",
+  SpeechStarted: "text-yellow-400 bg-yellow-500/10",
+  Metadata: "text-gray-400 bg-gray-500/10",
+  UtteranceEnd: "text-red-500 bg-red-600/10",
+  default: "text-zinc-400 bg-zinc-500/10",
+};
+
+function getTranscript(event: CapturedEvent): string {
+  const data = event.data;
+  const channel = data.channel as Record<string, unknown> | undefined;
+  if (channel) {
+    const alternatives = channel.alternatives as Array<{ transcript?: string }> | undefined;
+    if (alternatives?.[0]?.transcript) {
+      return alternatives[0].transcript;
+    }
+  }
+  return "";
+}
+
+export default function DeepgramEventItem({
+  event,
+  isExpanded,
+  onToggleExpand,
+}: DeepgramEventItemProps) {
+  const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
+  const data = event.data;
+
+  const transcript = getTranscript(event);
+  const isFinal = data.is_final as boolean | undefined;
+  const speechFinal = data.speech_final as boolean | undefined;
+
+  // For non-Results events
+  const isConnection = event.event_type === "Connected";
+  const isMetadata = event.event_type === "Metadata";
+
+  return (
+    <div className="flex-1 min-w-0 space-y-1">
+      <div className="flex items-center gap-2 flex-wrap">
+        <span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
+          {event.event_type}
+        </span>
+
+        {/* Final/Partial indicator for Results */}
+        {isFinal !== undefined && (
+          <span
+            className={`text-xs px-2 py-0.5 rounded ${isFinal
+              ? "text-emerald-400 bg-emerald-500/10"
+              : "text-amber-400 bg-amber-500/10"
+              }`}
+          >
+            {isFinal ? "Final" : "Partial"}
+          </span>
+        )}
+
+        {/* Speech Final indicator */}
+        {speechFinal && (
+          <span className="text-xs px-2 py-0.5 rounded text-cyan-400 bg-cyan-500/10">
+            Speech Final
+          </span>
+        )}
+      </div>
+
+      {/* Transcript or status message */}
+      <div className="text-sm text-zinc-300 truncate">
+        {transcript}
+      </div>
+
+      {/* Expand/collapse button */}
+      <button
+        onClick={(e) => {
+          e.stopPropagation();
+          onToggleExpand();
+        }}
+        className="text-xs text-zinc-500 hover:text-zinc-300"
+      >
+        {isExpanded ? "Hide details" : "Show details"}
+      </button>
+
+      {/* Expanded JSON view */}
+      {isExpanded && (
+        <pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
+          {JSON.stringify(event.data, null, 2)}
+        </pre>
+      )}
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/events/FluxEventItem.tsx
+++ b/evals/visualizer/src/components/events/FluxEventItem.tsx
@ -0,0 +1,115 @@
+"use client";
+
+import { CapturedEvent } from "@/types";
+
+interface FluxEventItemProps {
+  event: CapturedEvent;
+  isExpanded: boolean;
+  onToggleExpand: () => void;
+}
+
+const EVENT_COLORS: Record<string, string> = {
+  TurnInfo: "text-green-400 bg-green-500/10",
+  Connected: "text-yellow-400 bg-yellow-500/10",
+  Error: "text-red-500 bg-red-600/10",
+  default: "text-zinc-400 bg-zinc-500/10",
+};
+
+const FLUX_EVENT_COLORS: Record<string, string> = {
+  Update: "text-amber-300 bg-amber-500/20",
+  EndOfTurn: "text-emerald-300 bg-emerald-500/20",
+  EagerEndOfTurn: "text-cyan-300 bg-cyan-500/20",
+  StartOfTurn: "text-blue-300 bg-blue-500/20",
+  TurnResumed: "text-purple-300 bg-purple-500/20",
+  default: "text-zinc-300 bg-zinc-500/20",
+};
+
+export default function FluxEventItem({
+  event,
+  isExpanded,
+  onToggleExpand,
+}: FluxEventItemProps) {
+  const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
+  const data = event.data;
+
+  // Flux TurnInfo fields
+  const fluxEvent = data.event as string | undefined;
+  const transcript = data.transcript as string | undefined;
+  const endOfTurnConfidence = data.end_of_turn_confidence as number | undefined;
+  const turnIndex = data.turn_index as number | undefined;
+
+  const isFinal = fluxEvent === "EndOfTurn";
+  const fluxEventColor = fluxEvent
+    ? FLUX_EVENT_COLORS[fluxEvent] || FLUX_EVENT_COLORS.default
+    : "";
+
+  // For non-TurnInfo events
+  const isConnection = event.event_type === "Connected";
+
+  return (
+    <div className="flex-1 min-w-0 space-y-1">
+      <div className="flex items-center gap-2 flex-wrap">
+        <span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
+          {event.event_type}
+        </span>
+
+        {/* Flux sub-event type */}
+        {fluxEvent && (
+          <span className={`text-xs px-2 py-0.5 rounded ${fluxEventColor}`}>
+            {fluxEvent}
+          </span>
+        )}
+
+        {/* Final/Partial indicator */}
+        {fluxEvent && (
+          <span
+            className={`text-xs px-2 py-0.5 rounded ${
+              isFinal
+                ? "text-emerald-400 bg-emerald-500/10"
+                : "text-amber-400 bg-amber-500/10"
+            }`}
+          >
+            {isFinal ? "Final" : "Partial"}
+          </span>
+        )}
+
+        {/* Turn index */}
+        {turnIndex !== undefined && (
+          <span className="text-xs text-zinc-500">
+            Turn {turnIndex}
+          </span>
+        )}
+
+        {/* EOT confidence */}
+        {endOfTurnConfidence !== undefined && (
+          <span className="text-xs text-zinc-500 font-mono">
+            EOT: {(endOfTurnConfidence * 100).toFixed(1)}%
+          </span>
+        )}
+      </div>
+
+      {/* Transcript or status message */}
+      <div className="text-sm text-zinc-300 truncate">
+        {transcript || (isConnection ? "[Connected]" : `[${fluxEvent || event.event_type}]`)}
+      </div>
+
+      {/* Expand/collapse button */}
+      <button
+        onClick={(e) => {
+          e.stopPropagation();
+          onToggleExpand();
+        }}
+        className="text-xs text-zinc-500 hover:text-zinc-300"
+      >
+        {isExpanded ? "Hide details" : "Show details"}
+      </button>
+
+      {/* Expanded JSON view */}
+      {isExpanded && (
+        <pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
+          {JSON.stringify(event.data, null, 2)}
+        </pre>
+      )}
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/events/SpeechmaticsEventItem.tsx
+++ b/evals/visualizer/src/components/events/SpeechmaticsEventItem.tsx
@ -0,0 +1,101 @@
+"use client";
+
+import { CapturedEvent } from "@/types";
+
+interface SpeechmaticsEventItemProps {
+  event: CapturedEvent;
+  isExpanded: boolean;
+  onToggleExpand: () => void;
+}
+
+const EVENT_COLORS: Record<string, string> = {
+  AddTranscript: "text-purple-400 bg-purple-500/10",
+  RecognitionStarted: "text-yellow-400 bg-yellow-500/10",
+  EndOfTranscript: "text-red-400 bg-red-500/10",
+  Warning: "text-orange-400 bg-orange-500/10",
+  Error: "text-red-500 bg-red-600/10",
+  default: "text-zinc-400 bg-zinc-500/10",
+};
+
+function getTranscript(event: CapturedEvent): string {
+  const data = event.data;
+  const results = data.results as Array<{
+    type?: string;
+    alternatives?: Array<{ content?: string }>;
+  }> | undefined;
+
+  if (results) {
+    const words = results
+      .filter((r) => r.type === "word" && r.alternatives?.[0]?.content)
+      .map((r) => r.alternatives![0].content)
+      .join(" ");
+    return words;
+  }
+  return "";
+}
+
+export default function SpeechmaticsEventItem({
+  event,
+  isExpanded,
+  onToggleExpand,
+}: SpeechmaticsEventItemProps) {
+  const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
+  const data = event.data;
+
+  const transcript = getTranscript(event);
+
+  // Status events
+  const isRecognitionStarted = event.event_type === "RecognitionStarted";
+  const isEndOfTranscript = event.event_type === "EndOfTranscript";
+  const isWarning = event.event_type === "Warning";
+
+  // Warning reason
+  const warningReason = isWarning ? (data.reason as string | undefined) : undefined;
+
+  return (
+    <div className="flex-1 min-w-0 space-y-1">
+      <div className="flex items-center gap-2 flex-wrap">
+        <span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
+          {event.event_type}
+        </span>
+
+        {/* AddTranscript is always final in Speechmatics */}
+        {event.event_type === "AddTranscript" && (
+          <span className="text-xs px-2 py-0.5 rounded text-emerald-400 bg-emerald-500/10">
+            Final
+          </span>
+        )}
+      </div>
+
+      {/* Transcript or status message */}
+      <div className="text-sm text-zinc-300 truncate">
+        {transcript ||
+          (isRecognitionStarted
+            ? "[Recognition Started]"
+            : isEndOfTranscript
+              ? "[End of Transcript]"
+              : isWarning
+                ? `[Warning: ${warningReason || "unknown"}]`
+                : `[${event.event_type}]`)}
+      </div>
+
+      {/* Expand/collapse button */}
+      <button
+        onClick={(e) => {
+          e.stopPropagation();
+          onToggleExpand();
+        }}
+        className="text-xs text-zinc-500 hover:text-zinc-300"
+      >
+        {isExpanded ? "Hide details" : "Show details"}
+      </button>
+
+      {/* Expanded JSON view */}
+      {isExpanded && (
+        <pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
+          {JSON.stringify(event.data, null, 2)}
+        </pre>
+      )}
+    </div>
+  );
+}
--- a/evals/visualizer/src/components/events/index.ts
+++ b/evals/visualizer/src/components/events/index.ts
@ -0,0 +1,3 @@
+export { default as DeepgramEventItem } from "./DeepgramEventItem";
+export { default as FluxEventItem } from "./FluxEventItem";
+export { default as SpeechmaticsEventItem } from "./SpeechmaticsEventItem";
--- a/evals/visualizer/src/types/index.ts
+++ b/evals/visualizer/src/types/index.ts
@ -0,0 +1,24 @@
+export interface CapturedEvent {
+  timestamp: number;
+  event_type: string;
+  data: Record<string, unknown>;
+}
+
+export interface EventCaptureResult {
+  audio_file: string;
+  audio_path: string;
+  provider: string;
+  duration: number;
+  created_at: string;
+  events: CapturedEvent[];
+  transcript: string;
+}
+
+export interface ResultSummary {
+  id: string;
+  audio_file: string;
+  provider: string;
+  duration: number;
+  created_at: string;
+  event_count: number;
+}
--- a/evals/visualizer/tsconfig.json
+++ b/evals/visualizer/tsconfig.json
@ -0,0 +1,34 @@
+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts",
+    "**/*.mts"
+  ],
+  "exclude": ["node_modules"]
+}
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit a1d3062446240b6b27ebc787d28578e4561e7441
+Subproject commit f11fad8f3e90e06b1625b9dc49c13e26f3c9e716
--- a/release-please-config.json
+++ b/release-please-config.json
@ -16,6 +16,11 @@
            "type": "json",
            "path": "ui/package.json",
            "jsonpath": "$.version"
+          },
+          {
+            "type": "toml",
+            "path": "api/pyproject.toml",
+            "key": "project.version"
          }
        ]
      }
--- a/ui/src/app/after-sign-in/page.tsx
+++ b/ui/src/app/after-sign-in/page.tsx
@ -1,6 +1,6 @@
 import { redirect } from "next/navigation";

-import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
+import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
 import { getServerAccessToken,getServerAuthProvider, getServerUser } from "@/lib/auth/server";
 import logger from '@/lib/logger';
 import { getRedirectUrl } from "@/lib/utils";
@ -34,21 +34,18 @@ export default async function AfterSignInPage() {
    try {
        const accessToken = await getServerAccessToken();
        if (accessToken) {
-            const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
+            const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
                headers: {
                    Authorization: `Bearer ${accessToken}`,
                },
            });

-            const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
-            const activeWorkflows = workflows.filter(w => w.status === 'active');
-
            logger.debug('[AfterSignInPage] Found workflows:', {
-                total: workflows.length,
-                active: activeWorkflows.length
+                total: countResponse.data?.total,
+                active: countResponse.data?.active
            });

-            if (activeWorkflows.length > 0) {
+            if (countResponse.data && countResponse.data.active > 0) {
                logger.debug('[AfterSignInPage] Redirecting to /workflow - user has workflows');
                redirect('/workflow');
            } else {
--- a/ui/src/app/api/config/version/route.ts
+++ b/ui/src/app/api/config/version/route.ts
@ -0,0 +1,33 @@
+import { NextResponse } from "next/server";
+
+import { healthApiV1HealthGet } from "@/client/sdk.gen";
+import type { HealthResponse } from "@/client/types.gen";
+
+// Import version from package.json at build time
+import packageJson from "../../../../../package.json";
+
+export async function GET() {
+  const uiVersion = packageJson.version || "dev";
+
+  // Fetch backend version and config from health endpoint
+  let apiVersion = "unknown";
+  let backendApiEndpoint: string | null = null;
+
+  try {
+    const response = await healthApiV1HealthGet();
+    if (response.data) {
+      const data = response.data as HealthResponse;
+      apiVersion = data.version;
+      backendApiEndpoint = data.backend_api_endpoint;
+    }
+  } catch {
+    // Backend might not be reachable during build or in some deployments
+    apiVersion = "unavailable";
+  }
+
+  return NextResponse.json({
+    ui: uiVersion,
+    api: apiVersion,
+    backendApiEndpoint,
+  });
+}
--- a/ui/src/app/layout.tsx
+++ b/ui/src/app/layout.tsx
@ -9,6 +9,7 @@ import AppLayout from "@/components/layout/AppLayout";
 import PostHogIdentify from "@/components/PostHogIdentify";
 import SpinLoader from "@/components/SpinLoader";
 import { Toaster } from "@/components/ui/sonner";
+import { AppConfigProvider } from "@/context/AppConfigContext";
 import { OnboardingProvider } from "@/context/OnboardingContext";
 import { UserConfigProvider } from "@/context/UserConfigContext";
 import { AuthProvider } from "@/lib/auth";
@ -59,18 +60,20 @@ export default function RootLayout({
      <body
        className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
        <AuthProvider>
-          <Suspense fallback={<SpinLoader />}>
-            <UserConfigProvider>
-              <OnboardingProvider>
-                <PostHogIdentify />
-                <AppLayout>
-                  {children}
-                </AppLayout>
-                <Toaster />
-                <ChatwootWidget />
-              </OnboardingProvider>
-            </UserConfigProvider>
-          </Suspense>
+          <AppConfigProvider>
+            <Suspense fallback={<SpinLoader />}>
+              <UserConfigProvider>
+                <OnboardingProvider>
+                  <PostHogIdentify />
+                  <AppLayout>
+                    {children}
+                  </AppLayout>
+                  <Toaster />
+                  <ChatwootWidget />
+                </OnboardingProvider>
+              </UserConfigProvider>
+            </Suspense>
+          </AppConfigProvider>
        </AuthProvider>
      </body>
    </html>
--- a/ui/src/app/page.tsx
+++ b/ui/src/app/page.tsx
@ -1,7 +1,7 @@
 import { isNextRouterError } from "next/dist/client/components/is-next-router-error";
 import { redirect } from "next/navigation";

-import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
+import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
 import SignInClient from "@/components/SignInClient";
 import { getServerAccessToken,getServerAuthProvider,getServerUser } from "@/lib/auth/server";
 import logger from '@/lib/logger';
@ -21,21 +21,18 @@ export default async function Home() {
    try {
      const accessToken = await getServerAccessToken();
      if (accessToken) {
-        const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
+        const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
          headers: {
            Authorization: `Bearer ${accessToken}`,
          },
        });

-        const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
-        const activeWorkflows = workflows.filter(w => w.status === 'active');
-
        logger.debug('[HomePage] Found workflows for local provider:', {
-          total: workflows.length,
-          active: activeWorkflows.length
+          total: countResponse.data?.total,
+          active: countResponse.data?.active
        });

-        if (activeWorkflows.length > 0) {
+        if (countResponse.data && countResponse.data.active > 0) {
          logger.debug('[HomePage] Redirecting to /workflow - user has workflows');
          redirect('/workflow');
        } else {
--- a/ui/src/app/usage/page.tsx
+++ b/ui/src/app/usage/page.tsx
@ -326,14 +326,64 @@ export default function UsagePage() {
                                    isDisabled={savingTimezone || userConfigLoading}
                                    placeholder={userConfigLoading ? "Loading..." : "Select timezone"}
                                    styles={{
-                                        control: (base) => ({
+                                        control: (base, state) => ({
                                            ...base,
                                            minHeight: '36px',
                                            fontSize: '14px',
+                                            backgroundColor: 'var(--background)',
+                                            borderColor: state.isFocused ? 'var(--ring)' : 'var(--border)',
+                                            boxShadow: state.isFocused ? '0 0 0 2px color-mix(in srgb, var(--ring) 20%, transparent)' : 'none',
+                                            '&:hover': {
+                                                borderColor: 'var(--border)',
+                                            },
                                        }),
                                        menu: (base) => ({
                                            ...base,
                                            zIndex: 9999,
+                                            backgroundColor: 'var(--popover)',
+                                            border: '1px solid var(--border)',
+                                            boxShadow: '0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
+                                        }),
+                                        menuList: (base) => ({
+                                            ...base,
+                                            backgroundColor: 'var(--popover)',
+                                            padding: 0,
+                                        }),
+                                        option: (base, state) => ({
+                                            ...base,
+                                            backgroundColor: state.isSelected
+                                                ? 'var(--accent)'
+                                                : state.isFocused
+                                                ? 'var(--accent)'
+                                                : 'var(--popover)',
+                                            color: 'var(--foreground)',
+                                            cursor: 'pointer',
+                                            '&:active': {
+                                                backgroundColor: 'var(--accent)',
+                                            },
+                                        }),
+                                        singleValue: (base) => ({
+                                            ...base,
+                                            color: 'var(--foreground)',
+                                        }),
+                                        input: (base) => ({
+                                            ...base,
+                                            color: 'var(--foreground)',
+                                        }),
+                                        placeholder: (base) => ({
+                                            ...base,
+                                            color: 'var(--muted-foreground)',
+                                        }),
+                                        indicatorSeparator: (base) => ({
+                                            ...base,
+                                            backgroundColor: 'var(--border)',
+                                        }),
+                                        dropdownIndicator: (base) => ({
+                                            ...base,
+                                            color: 'var(--muted-foreground)',
+                                            '&:hover': {
+                                                color: 'var(--foreground)',
+                                            },
                                        }),
                                    }}
                                />
--- a/ui/src/client/sdk.gen.ts
+++ b/ui/src/client/sdk.gen.ts
--- a/ui/src/client/types.gen.ts
+++ b/ui/src/client/types.gen.ts
@ -524,6 +524,12 @@ export type HttpValidationError = {
    detail?: Array<ValidationError>;
 };

+export type HealthResponse = {
+    status: string;
+    version: string;
+    backend_api_endpoint: string;
+};
+
 /**
 * Configuration for HTTP API tools.
 */
@ -1042,6 +1048,15 @@ export type VonageConfigurationResponse = {
 */
 export type WebhookCredentialType = 'none' | 'api_key' | 'bearer_token' | 'basic_auth' | 'custom_header';

+/**
+ * Response for workflow count endpoint.
+ */
+export type WorkflowCountResponse = {
+    total: number;
+    active: number;
+    archived: number;
+};
+
 export type WorkflowError = {
    kind: ItemKind;
    id: string | null;
@ -1049,6 +1064,17 @@ export type WorkflowError = {
    message: string;
 };

+/**
+ * Lightweight response for workflow listings (excludes large fields).
+ */
+export type WorkflowListResponse = {
+    id: number;
+    name: string;
+    status: string;
+    created_at: string;
+    total_runs: number;
+};
+
 export type WorkflowOption = {
    id: number;
    name: string;
@ -1391,6 +1417,7 @@ export type HandleInboundTelephonyApiV1TelephonyInboundWorkflowIdPostData = {
        'x-twilio-signature'?: string | null;
        'x-vobiz-signature'?: string | null;
        'x-vobiz-timestamp'?: string | null;
+        'x-cx-apikey'?: string | null;
    };
    path: {
        workflow_id: number;
@ -1655,6 +1682,39 @@ export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses =

 export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponse = CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses[keyof CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses];

+export type GetWorkflowCountApiV1WorkflowCountGetData = {
+    body?: never;
+    headers?: {
+        authorization?: string | null;
+        'X-API-Key'?: string | null;
+    };
+    path?: never;
+    query?: never;
+    url: '/api/v1/workflow/count';
+};
+
+export type GetWorkflowCountApiV1WorkflowCountGetErrors = {
+    /**
+     * Not found
+     */
+    404: unknown;
+    /**
+     * Validation Error
+     */
+    422: HttpValidationError;
+};
+
+export type GetWorkflowCountApiV1WorkflowCountGetError = GetWorkflowCountApiV1WorkflowCountGetErrors[keyof GetWorkflowCountApiV1WorkflowCountGetErrors];
+
+export type GetWorkflowCountApiV1WorkflowCountGetResponses = {
+    /**
+     * Successful Response
+     */
+    200: WorkflowCountResponse;
+};
+
+export type GetWorkflowCountApiV1WorkflowCountGetResponse = GetWorkflowCountApiV1WorkflowCountGetResponses[keyof GetWorkflowCountApiV1WorkflowCountGetResponses];
+
 export type GetWorkflowsApiV1WorkflowFetchGetData = {
    body?: never;
    headers?: {
@ -1688,7 +1748,7 @@ export type GetWorkflowsApiV1WorkflowFetchGetResponses = {
    /**
     * Successful Response
     */
-    200: Array<WorkflowResponse>;
+    200: Array<WorkflowListResponse>;
 };

 export type GetWorkflowsApiV1WorkflowFetchGetResponse = GetWorkflowsApiV1WorkflowFetchGetResponses[keyof GetWorkflowsApiV1WorkflowFetchGetResponses];
@ -4168,6 +4228,41 @@ export type InitiateCallApiV1PublicAgentUuidPostResponses = {

 export type InitiateCallApiV1PublicAgentUuidPostResponse = InitiateCallApiV1PublicAgentUuidPostResponses[keyof InitiateCallApiV1PublicAgentUuidPostResponses];

+export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData = {
+    body?: never;
+    path: {
+        token: string;
+        artifact_type: 'recording' | 'transcript';
+    };
+    query?: {
+        /**
+         * Display inline in browser instead of download
+         */
+        inline?: boolean;
+    };
+    url: '/api/v1/public/download/workflow/{token}/{artifact_type}';
+};
+
+export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors = {
+    /**
+     * Not found
+     */
+    404: unknown;
+    /**
+     * Validation Error
+     */
+    422: HttpValidationError;
+};
+
+export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetError = DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors[keyof DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors];
+
+export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses = {
+    /**
+     * Successful Response
+     */
+    200: unknown;
+};
+
 export type DeactivateEmbedTokenApiV1WorkflowWorkflowIdEmbedTokenDeleteData = {
    body?: never;
    headers?: {
@ -4500,9 +4595,11 @@ export type HealthApiV1HealthGetResponses = {
    /**
     * Successful Response
     */
-    200: unknown;
+    200: HealthResponse;
 };

+export type HealthApiV1HealthGetResponse = HealthApiV1HealthGetResponses[keyof HealthApiV1HealthGetResponses];
+
 export type ClientOptions = {
    baseUrl: 'http://127.0.0.1:8000' | (string & {});
 };
--- a/ui/src/components/MediaPreviewDialog.tsx
+++ b/ui/src/components/MediaPreviewDialog.tsx
@ -22,6 +22,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
    const [isOpen, setIsOpen] = useState(false);
    const [mediaType, setMediaType] = useState<'audio' | 'transcript' | null>(null);
    const [mediaSignedUrl, setMediaSignedUrl] = useState<string | null>(null);
+    const [transcriptContent, setTranscriptContent] = useState<string | null>(null);
    const [selectedRunId, setSelectedRunId] = useState<number | null>(null);
    const [mediaDownloadKey, setMediaDownloadKey] = useState<string | null>(null);
    const [mediaLoading, setMediaLoading] = useState(false);
@ -47,6 +48,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
        async (fileKey: string | null, runId: number) => {
            if (!fileKey || !accessToken) return;
            setMediaLoading(true);
+            setTranscriptContent(null);
            const signed = await getSignedUrl(fileKey, accessToken, true);
            if (signed) {
                setMediaType('transcript');
@ -54,6 +56,14 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
                setMediaDownloadKey(fileKey);
                setSelectedRunId(runId);
                setIsOpen(true);
+                // Fetch transcript content with proper UTF-8 encoding
+                try {
+                    const response = await fetch(signed);
+                    const text = await response.text();
+                    setTranscriptContent(text);
+                } catch (error) {
+                    console.error('Error fetching transcript:', error);
+                }
            }
            setMediaLoading(false);
        },
@ -84,12 +94,10 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
                        <audio src={mediaSignedUrl} controls autoPlay className="w-full mt-4" />
                    )}

-                    {!mediaLoading && mediaType === 'transcript' && mediaSignedUrl && (
-                        <iframe
-                            src={mediaSignedUrl}
-                            title="Transcript"
-                            className="w-full h-[60vh] border rounded-md mt-4"
-                        />
+                    {!mediaLoading && mediaType === 'transcript' && transcriptContent && (
+                        <pre className="w-full h-[60vh] overflow-auto border rounded-md mt-4 p-4 bg-muted text-sm whitespace-pre-wrap font-mono">
+                            {transcriptContent}
+                        </pre>
                    )}

                    <DialogFooter className="pt-4">
--- a/ui/src/components/ServiceConfiguration.tsx
+++ b/ui/src/components/ServiceConfiguration.tsx
@ -321,9 +321,20 @@ export default function ServiceConfiguration() {
        if (!providerSchema) return [];

        // Find all config fields (not provider, not api_key)
-        return Object.keys(providerSchema.properties).filter(
+        const fields = Object.keys(providerSchema.properties).filter(
            field => field !== "provider" && field !== "api_key"
        );
+
+        // For Deepgram STT, hide language field when flux-general-en model is selected
+        // Flux model is English-only and doesn't support language selection
+        if (service === "stt" && currentProvider === "deepgram") {
+            const currentModel = watch("stt_model") as string;
+            if (currentModel === "flux-general-en") {
+                return fields.filter(field => field !== "language");
+            }
+        }
+
+        return fields;
    };

    const renderServiceFields = (service: ServiceSegment) => {
--- a/ui/src/components/flow/nodes/EndCall.tsx
+++ b/ui/src/components/flow/nodes/EndCall.tsx
@ -35,7 +35,7 @@ interface EndCallNodeProps extends NodeProps {
 }

 export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
-    const { open, setOpen, handleSaveNodeData } = useNodeHandlers({
+    const { open, setOpen, handleSaveNodeData, handleDeleteNode } = useNodeHandlers({
        id,
        additionalData: { is_end: true }
    });
@ -122,9 +122,14 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
            </NodeContent>

            <NodeToolbar isVisible={selected} position={Position.Right}>
-                <Button onClick={() => setOpen(true)} variant="outline" size="icon">
-                    <Edit />
-                </Button>
+                <div className="flex flex-col gap-1">
+                    <Button onClick={() => setOpen(true)} variant="outline" size="icon">
+                        <Edit />
+                    </Button>
+                    <Button onClick={handleDeleteNode} variant="outline" size="icon">
+                        <Trash2Icon />
+                    </Button>
+                </div>
            </NodeToolbar>

            <NodeEditDialog
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>`
				`@ -0,0 +1 @@`
				<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
				`@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
				`@ -0,0 +1 @@`
				`<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>`