mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
fix: changes to update pipecat version to 0.0.100 (#122)
* feat: add stt evals * add smart turn as provider * chore: remove deprecations * chore: format files * fix: remove deprecated UserIdleProcessor * fix: remove deprecated TranscriptProcessor * chore: update pipecat submodule * feat: add evals visualisation * fix: trigger llm generation on client connected and pipeline started * chore: update pipecat * chore: update pipecat submodule * Add tests * fix: slow loading of workflow page * chore: update pipecat submodule * Show version after release * Fixes #99 * fix: provider check for websocket connection * Fixes #107 * Fix #96 * chore: fix documentation * fix: cloudonix campaign call error --------- Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
parent
a4367bd83b
commit
911c5ed416
104 changed files with 16919 additions and 597 deletions
|
|
@ -1 +1,2 @@
|
|||
api/.env
|
||||
api/.env
|
||||
evals/
|
||||
|
|
|
|||
47
.github/workflows/docker-image.yml
vendored
47
.github/workflows/docker-image.yml
vendored
|
|
@ -4,7 +4,7 @@ on:
|
|||
release:
|
||||
types: [published]
|
||||
|
||||
# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
|
||||
# Ensure only one workflow run per branch at a time; cancel any in-progress runs on new push
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
|
@ -13,11 +13,11 @@ jobs:
|
|||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }} # Used to tag images with short commit SHA
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
service:
|
||||
service:
|
||||
- "dograh-api|api/Dockerfile|."
|
||||
- "dograh-ui|ui/Dockerfile|."
|
||||
|
||||
|
|
@ -25,14 +25,12 @@ jobs:
|
|||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true # Only for version check, not used in build
|
||||
submodules: true
|
||||
|
||||
# Pipecat version check removed - now using local submodule
|
||||
|
||||
- name: Set up QEMU # Enables cross-platform builds (e.g., arm64)
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx # Enables multi-arch and advanced Docker builds
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to DockerHub
|
||||
|
|
@ -51,48 +49,50 @@ jobs:
|
|||
- name: Set build variables
|
||||
id: build-vars
|
||||
run: |
|
||||
# Parse matrix entry and set variables early (before build)
|
||||
SERVICE="${{ matrix.service }}"
|
||||
IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
|
||||
SHORT_SHA=${COMMIT_SHA::8}
|
||||
|
||||
# Export for use in subsequent steps
|
||||
|
||||
# Get version from release tag (removes 'dograh-' and 'v' prefixes if present)
|
||||
VERSION="${{ github.event.release.tag_name }}"
|
||||
VERSION="${VERSION#dograh-}"
|
||||
VERSION="${VERSION#v}"
|
||||
|
||||
echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "service=${SERVICE}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "version=${VERSION}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build and Push ${{ matrix.service }}
|
||||
id: docker-build
|
||||
run: |
|
||||
# Parse matrix entry into individual variables
|
||||
SERVICE="${{ matrix.service }}"
|
||||
IMAGE_NAME=$(echo "$SERVICE" | cut -d '|' -f1)
|
||||
DOCKERFILE=$(echo "$SERVICE" | cut -d '|' -f2)
|
||||
CONTEXT=$(echo "$SERVICE" | cut -d '|' -f3)
|
||||
SHORT_SHA=${COMMIT_SHA::8}
|
||||
VERSION="${{ steps.build-vars.outputs.version }}"
|
||||
|
||||
echo "Building and pushing image: $IMAGE_NAME"
|
||||
echo "Dockerfile: $DOCKERFILE"
|
||||
echo "Context: $CONTEXT"
|
||||
echo "Commit SHA: $SHORT_SHA"
|
||||
|
||||
# Export tags for Slack notification
|
||||
echo "Version: $VERSION"
|
||||
|
||||
echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "dockerhub_tag=${{ secrets.DOCKERHUB_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "ghcr_tag=ghcr.io/${{ secrets.GHCR_USERNAME }}/${IMAGE_NAME}:${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Build and push multi-arch Docker image to DockerHub and GHCR
|
||||
docker buildx build \
|
||||
-f "$DOCKERFILE" \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$VERSION \
|
||||
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
|
||||
--tag ${{ secrets.DOCKERHUB_USERNAME }}/$IMAGE_NAME:latest \
|
||||
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$VERSION \
|
||||
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:$SHORT_SHA \
|
||||
--tag ghcr.io/${{ secrets.GHCR_USERNAME }}/$IMAGE_NAME:latest \
|
||||
--push "$CONTEXT"
|
||||
|
||||
# Success notification
|
||||
|
||||
- name: Send Slack notification - Success
|
||||
if: success()
|
||||
uses: slackapi/slack-github-action@v1.26.0
|
||||
|
|
@ -101,10 +101,9 @@ jobs:
|
|||
with:
|
||||
payload: |
|
||||
{
|
||||
"text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }}"
|
||||
"text": "✅ Docker Build Successful - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }}"
|
||||
}
|
||||
|
||||
# Failure notification
|
||||
|
||||
- name: Send Slack notification - Failure
|
||||
if: failure()
|
||||
uses: slackapi/slack-github-action@v1.26.0
|
||||
|
|
@ -113,5 +112,5 @@ jobs:
|
|||
with:
|
||||
payload: |
|
||||
{
|
||||
"text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.short_sha }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
|
||||
"text": "❌ Docker Build Failed - ${{ steps.build-vars.outputs.image_name }} (${{ steps.build-vars.outputs.version }}) on ${{ github.ref_name }} by ${{ github.actor }} - <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
|
||||
}
|
||||
|
|
|
|||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,6 +1,7 @@
|
|||
__pycache__
|
||||
.DS_Store
|
||||
.env
|
||||
.env.prod
|
||||
.env.test
|
||||
|
||||
# logs and run directory on production
|
||||
|
|
|
|||
72
api/alembic/versions/181475b2a1a1_add_public_access_token.py
Normal file
72
api/alembic/versions/181475b2a1a1_add_public_access_token.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""add public_access_token
|
||||
|
||||
Revision ID: 181475b2a1a1
|
||||
Revises: dc33eef8dabe
|
||||
Create Date: 2026-01-23 17:37:54.449308
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "181475b2a1a1"
|
||||
down_revision: Union[str, None] = "dc33eef8dabe"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(op.f("ix_api_keys_key_hash"), table_name="api_keys")
|
||||
op.create_index("ix_api_keys_key_hash", "api_keys", ["key_hash"], unique=False)
|
||||
op.create_index(
|
||||
"ix_kb_chunks_embedding_ivfflat",
|
||||
"knowledge_base_chunks",
|
||||
["embedding"],
|
||||
unique=False,
|
||||
postgresql_using="ivfflat",
|
||||
postgresql_with={"lists": 100},
|
||||
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||
)
|
||||
op.create_index(
|
||||
"ix_kb_chunks_embedding_model",
|
||||
"knowledge_base_chunks",
|
||||
["embedding_model"],
|
||||
unique=False,
|
||||
)
|
||||
op.add_column(
|
||||
"workflow_runs",
|
||||
sa.Column("public_access_token", sa.String(length=36), nullable=True),
|
||||
)
|
||||
op.create_index(
|
||||
"idx_workflow_runs_public_access_token",
|
||||
"workflow_runs",
|
||||
["public_access_token"],
|
||||
unique=True,
|
||||
postgresql_where=sa.text("public_access_token IS NOT NULL"),
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(
|
||||
"idx_workflow_runs_public_access_token",
|
||||
table_name="workflow_runs",
|
||||
postgresql_where=sa.text("public_access_token IS NOT NULL"),
|
||||
)
|
||||
op.drop_column("workflow_runs", "public_access_token")
|
||||
op.drop_index("ix_kb_chunks_embedding_model", table_name="knowledge_base_chunks")
|
||||
op.drop_index(
|
||||
"ix_kb_chunks_embedding_ivfflat",
|
||||
table_name="knowledge_base_chunks",
|
||||
postgresql_using="ivfflat",
|
||||
postgresql_with={"lists": 100},
|
||||
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||
)
|
||||
op.drop_index("ix_api_keys_key_hash", table_name="api_keys")
|
||||
op.create_index(op.f("ix_api_keys_key_hash"), "api_keys", ["key_hash"], unique=True)
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -14,7 +14,6 @@ FILLER_SOUND_PROBABILITY = 0.0
|
|||
VOICEMAIL_RECORDING_DURATION = 5.0
|
||||
|
||||
# Configuration constants
|
||||
ENABLE_SMART_TURN = os.getenv("ENABLE_SMART_TURN", "false").lower() == "true"
|
||||
ENABLE_TRACING = os.getenv("ENABLE_TRACING", "false").lower() == "true"
|
||||
ENABLE_RNNOISE = os.getenv("ENABLE_RNNOISE", "false").lower() == "true"
|
||||
|
||||
|
|
@ -52,6 +51,23 @@ ENABLE_ARI_STASIS = os.getenv("ENABLE_ARI_STASIS", "false").lower() == "true"
|
|||
SERIALIZE_LOG_OUTPUT = os.getenv("SERIALIZE_LOG_OUTPUT", "false").lower() == "true"
|
||||
ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false").lower() == "true"
|
||||
|
||||
|
||||
def _get_version() -> str:
|
||||
"""Read version from pyproject.toml."""
|
||||
try:
|
||||
import tomllib
|
||||
|
||||
pyproject_path = APP_ROOT_DIR / "pyproject.toml"
|
||||
with open(pyproject_path, "rb") as f:
|
||||
pyproject = tomllib.load(f)
|
||||
return pyproject.get("project", {}).get("version", "dev")
|
||||
except Exception:
|
||||
return "dev"
|
||||
|
||||
|
||||
# Application version (read from pyproject.toml)
|
||||
APP_VERSION = _get_version()
|
||||
|
||||
# Country code mapping: ISO country code -> international dialing prefix
|
||||
COUNTRY_CODES = {
|
||||
"US": "1", # United States
|
||||
|
|
|
|||
|
|
@ -360,6 +360,17 @@ class WorkflowRunModel(Base):
|
|||
campaign = relationship("CampaignModel")
|
||||
queued_run_id = Column(Integer, ForeignKey("queued_runs.id"), nullable=True)
|
||||
queued_run = relationship("QueuedRunModel", foreign_keys=[queued_run_id])
|
||||
public_access_token = Column(String(36), nullable=True)
|
||||
|
||||
# Indexes
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"idx_workflow_runs_public_access_token",
|
||||
"public_access_token",
|
||||
unique=True,
|
||||
postgresql_where=text("public_access_token IS NOT NULL"),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# LoopTalk Testing Models
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
|||
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
from sqlalchemy.orm import load_only, selectinload
|
||||
|
||||
from api.db.base_client import BaseDBClient
|
||||
from api.db.models import WorkflowDefinitionModel, WorkflowModel, WorkflowRunModel
|
||||
|
|
@ -111,6 +111,70 @@ class WorkflowClient(BaseDBClient):
|
|||
result = await session.execute(query)
|
||||
return result.scalars().all()
|
||||
|
||||
async def get_all_workflows_for_listing(
|
||||
self, organization_id: int = None, status: str = None
|
||||
) -> list[WorkflowModel]:
|
||||
"""Get workflows with only the columns needed for listing.
|
||||
|
||||
This is an optimized version that excludes large JSON columns like
|
||||
workflow_definition, template_context_variables, etc.
|
||||
|
||||
Args:
|
||||
organization_id: Filter by organization ID
|
||||
status: Filter by status (active/archived)
|
||||
|
||||
Returns:
|
||||
List of WorkflowModel with only id, name, status, created_at loaded
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
query = select(WorkflowModel).options(
|
||||
load_only(
|
||||
WorkflowModel.id,
|
||||
WorkflowModel.name,
|
||||
WorkflowModel.status,
|
||||
WorkflowModel.created_at,
|
||||
)
|
||||
)
|
||||
|
||||
if organization_id:
|
||||
query = query.where(WorkflowModel.organization_id == organization_id)
|
||||
|
||||
if status:
|
||||
query = query.where(WorkflowModel.status == status)
|
||||
|
||||
result = await session.execute(query)
|
||||
return result.scalars().all()
|
||||
|
||||
async def get_workflow_counts(self, organization_id: int = None) -> dict[str, int]:
|
||||
"""Get workflow counts by status.
|
||||
|
||||
Args:
|
||||
organization_id: Filter by organization ID
|
||||
|
||||
Returns:
|
||||
Dict with 'total', 'active', 'archived' counts
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
query = select(
|
||||
WorkflowModel.status,
|
||||
func.count(WorkflowModel.id).label("count"),
|
||||
)
|
||||
|
||||
if organization_id:
|
||||
query = query.where(WorkflowModel.organization_id == organization_id)
|
||||
|
||||
query = query.group_by(WorkflowModel.status)
|
||||
|
||||
result = await session.execute(query)
|
||||
rows = result.all()
|
||||
|
||||
counts = {"total": 0, "active": 0, "archived": 0}
|
||||
for status, count in rows:
|
||||
counts[status] = count
|
||||
counts["total"] += count
|
||||
|
||||
return counts
|
||||
|
||||
async def get_workflow(
|
||||
self, workflow_id: int, user_id: int = None, organization_id: int = None
|
||||
) -> WorkflowModel | None:
|
||||
|
|
@ -310,3 +374,33 @@ class WorkflowClient(BaseDBClient):
|
|||
)
|
||||
)
|
||||
return result.scalar() or 0
|
||||
|
||||
async def get_workflow_run_counts(self, workflow_ids: list[int]) -> dict[int, int]:
|
||||
"""Get run counts for multiple workflows in a single query.
|
||||
|
||||
Args:
|
||||
workflow_ids: List of workflow IDs to get counts for
|
||||
|
||||
Returns:
|
||||
Dict mapping workflow_id to run count
|
||||
"""
|
||||
if not workflow_ids:
|
||||
return {}
|
||||
|
||||
async with self.async_session() as session:
|
||||
result = await session.execute(
|
||||
select(
|
||||
WorkflowRunModel.workflow_id,
|
||||
func.count(WorkflowRunModel.id).label("run_count"),
|
||||
)
|
||||
.where(WorkflowRunModel.workflow_id.in_(workflow_ids))
|
||||
.group_by(WorkflowRunModel.workflow_id)
|
||||
)
|
||||
rows = result.all()
|
||||
|
||||
# Build dict with counts, defaulting to 0 for workflows with no runs
|
||||
counts = {workflow_id: 0 for workflow_id in workflow_ids}
|
||||
for workflow_id, run_count in rows:
|
||||
counts[workflow_id] = run_count
|
||||
|
||||
return counts
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
|
@ -414,3 +415,56 @@ class WorkflowRunClient(BaseDBClient):
|
|||
|
||||
organization_id = workflow_run.workflow.user.selected_organization_id
|
||||
return workflow_run, organization_id
|
||||
|
||||
async def ensure_public_access_token(self, workflow_run_id: int) -> Optional[str]:
|
||||
"""Generate a public access token if not exists, return existing if present (idempotent).
|
||||
|
||||
Args:
|
||||
workflow_run_id: The ID of the workflow run
|
||||
|
||||
Returns:
|
||||
The public access token string, or None if workflow run not found
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
result = await session.execute(
|
||||
select(WorkflowRunModel).where(WorkflowRunModel.id == workflow_run_id)
|
||||
)
|
||||
run = result.scalars().first()
|
||||
if not run:
|
||||
return None
|
||||
|
||||
# Return existing token if present
|
||||
if run.public_access_token:
|
||||
return run.public_access_token
|
||||
|
||||
# Generate and persist new token
|
||||
token = str(uuid.uuid4())
|
||||
run.public_access_token = token
|
||||
|
||||
try:
|
||||
await session.commit()
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
raise e
|
||||
await session.refresh(run)
|
||||
|
||||
return run.public_access_token
|
||||
|
||||
async def get_workflow_run_by_public_token(
|
||||
self, token: str
|
||||
) -> Optional[WorkflowRunModel]:
|
||||
"""Lookup workflow run by public access token.
|
||||
|
||||
Args:
|
||||
token: The public access token
|
||||
|
||||
Returns:
|
||||
The WorkflowRunModel if found, None otherwise
|
||||
"""
|
||||
async with self.async_session() as session:
|
||||
result = await session.execute(
|
||||
select(WorkflowRunModel).where(
|
||||
WorkflowRunModel.public_access_token == token
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
|
|
|||
5
api/pyproject.toml
Normal file
5
api/pyproject.toml
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
[project]
|
||||
name = "dograh-api"
|
||||
version = "1.10.0"
|
||||
description = "Backend API for Dograh voice AI platform"
|
||||
requires-python = ">=3.12"
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
from fastapi import APIRouter
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from api.routes.campaign import router as campaign_router
|
||||
from api.routes.credentials import router as credentials_router
|
||||
|
|
@ -9,6 +10,7 @@ from api.routes.looptalk import router as looptalk_router
|
|||
from api.routes.organization import router as organization_router
|
||||
from api.routes.organization_usage import router as organization_usage_router
|
||||
from api.routes.public_agent import router as public_agent_router
|
||||
from api.routes.public_download import router as public_download_router
|
||||
from api.routes.public_embed import router as public_embed_router
|
||||
from api.routes.reports import router as reports_router
|
||||
from api.routes.s3_signed_url import router as s3_router
|
||||
|
|
@ -43,11 +45,24 @@ router.include_router(reports_router)
|
|||
router.include_router(webrtc_signaling_router)
|
||||
router.include_router(public_embed_router)
|
||||
router.include_router(public_agent_router)
|
||||
router.include_router(public_download_router)
|
||||
router.include_router(workflow_embed_router)
|
||||
router.include_router(knowledge_base_router)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health():
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
backend_api_endpoint: str
|
||||
|
||||
|
||||
@router.get("/health", response_model=HealthResponse)
|
||||
async def health() -> HealthResponse:
|
||||
from api.constants import APP_VERSION, BACKEND_API_ENDPOINT
|
||||
|
||||
logger.debug("Health endpoint called")
|
||||
return {"message": "OK"}
|
||||
return HealthResponse(
|
||||
status="ok",
|
||||
version=APP_VERSION,
|
||||
backend_api_endpoint=BACKEND_API_ENDPOINT,
|
||||
)
|
||||
|
|
|
|||
95
api/routes/public_download.py
Normal file
95
api/routes/public_download.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
"""Public download endpoints for workflow recordings and transcripts.
|
||||
|
||||
These endpoints provide secure, token-based public access to workflow artifacts
|
||||
without requiring authentication. Tokens are generated on-demand when webhooks
|
||||
are executed and included in the webhook payload.
|
||||
"""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
|
||||
from api.db import db_client
|
||||
from api.services.storage import get_storage_for_backend
|
||||
|
||||
router = APIRouter(prefix="/public/download")
|
||||
|
||||
|
||||
@router.get("/workflow/{token}/{artifact_type}")
|
||||
async def download_workflow_artifact(
|
||||
token: str,
|
||||
artifact_type: Literal["recording", "transcript"],
|
||||
inline: bool = Query(
|
||||
default=False, description="Display inline in browser instead of download"
|
||||
),
|
||||
):
|
||||
"""Download a workflow recording or transcript via public access token.
|
||||
|
||||
This endpoint:
|
||||
1. Validates the public access token
|
||||
2. Looks up the corresponding workflow run
|
||||
3. Generates a signed URL for the requested artifact
|
||||
4. Redirects to the signed URL
|
||||
|
||||
Args:
|
||||
token: The public access token (UUID format)
|
||||
artifact_type: Type of artifact - "recording" or "transcript"
|
||||
inline: If true, sets Content-Disposition to inline for browser preview
|
||||
|
||||
Returns:
|
||||
RedirectResponse to the signed URL (302 redirect)
|
||||
|
||||
Raises:
|
||||
HTTPException 404: If token is invalid or artifact not found
|
||||
"""
|
||||
# 1. Lookup workflow run by token
|
||||
workflow_run = await db_client.get_workflow_run_by_public_token(token)
|
||||
if not workflow_run:
|
||||
logger.warning(f"Invalid public access token: {token[:8]}...")
|
||||
raise HTTPException(status_code=404, detail="Invalid or expired token")
|
||||
|
||||
# 2. Get file path based on artifact type
|
||||
if artifact_type == "recording":
|
||||
file_path = workflow_run.recording_url
|
||||
else: # transcript
|
||||
file_path = workflow_run.transcript_url
|
||||
|
||||
if not file_path:
|
||||
logger.warning(
|
||||
f"Artifact not found: type={artifact_type}, workflow_run_id={workflow_run.id}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No {artifact_type} available for this workflow run",
|
||||
)
|
||||
|
||||
# 3. Get storage backend for this workflow run
|
||||
try:
|
||||
storage = get_storage_for_backend(workflow_run.storage_backend)
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid storage backend: {workflow_run.storage_backend}")
|
||||
raise HTTPException(status_code=500, detail="Storage configuration error")
|
||||
|
||||
# 4. Generate signed URL (1 hour expiration)
|
||||
try:
|
||||
signed_url = await storage.aget_signed_url(
|
||||
file_path=file_path,
|
||||
expiration=3600, # 1 hour
|
||||
force_inline=inline,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate signed URL: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL")
|
||||
|
||||
if not signed_url:
|
||||
logger.error(f"Storage returned None for signed URL: {file_path}")
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL")
|
||||
|
||||
logger.info(
|
||||
f"Generated signed URL for {artifact_type}: workflow_run_id={workflow_run.id}, token={token[:8]}..."
|
||||
)
|
||||
|
||||
# 5. Redirect to signed URL
|
||||
return RedirectResponse(url=signed_url, status_code=302)
|
||||
|
|
@ -97,6 +97,24 @@ class WorkflowResponse(BaseModel):
|
|||
workflow_configurations: dict | None = None
|
||||
|
||||
|
||||
class WorkflowListResponse(BaseModel):
|
||||
"""Lightweight response for workflow listings (excludes large fields)."""
|
||||
|
||||
id: int
|
||||
name: str
|
||||
status: str
|
||||
created_at: datetime
|
||||
total_runs: int
|
||||
|
||||
|
||||
class WorkflowCountResponse(BaseModel):
|
||||
"""Response for workflow count endpoint."""
|
||||
|
||||
total: int
|
||||
active: int
|
||||
archived: int
|
||||
|
||||
|
||||
class WorkflowTemplateResponse(BaseModel):
|
||||
id: int
|
||||
template_name: str
|
||||
|
|
@ -359,6 +377,26 @@ class WorkflowSummaryResponse(BaseModel):
|
|||
name: str
|
||||
|
||||
|
||||
@router.get("/count")
|
||||
async def get_workflow_count(
|
||||
user: UserModel = Depends(get_user),
|
||||
) -> WorkflowCountResponse:
|
||||
"""Get workflow counts for the authenticated user's organization.
|
||||
|
||||
This is a lightweight endpoint for checking if the user has workflows,
|
||||
useful for redirect logic without fetching full workflow data.
|
||||
"""
|
||||
counts = await db_client.get_workflow_counts(
|
||||
organization_id=user.selected_organization_id
|
||||
)
|
||||
|
||||
return WorkflowCountResponse(
|
||||
total=counts["total"],
|
||||
active=counts["active"],
|
||||
archived=counts["archived"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/fetch")
|
||||
async def get_workflows(
|
||||
user: UserModel = Depends(get_user),
|
||||
|
|
@ -366,45 +404,43 @@ async def get_workflows(
|
|||
None,
|
||||
description="Filter by status - can be single value (active/archived) or comma-separated (active,archived)",
|
||||
),
|
||||
) -> List[WorkflowResponse]:
|
||||
"""Get all workflows for the authenticated user's organization"""
|
||||
) -> List[WorkflowListResponse]:
|
||||
"""Get all workflows for the authenticated user's organization.
|
||||
|
||||
Returns a lightweight response with only essential fields for listing.
|
||||
Use GET /workflow/fetch/{workflow_id} to get full workflow details.
|
||||
"""
|
||||
# Handle comma-separated status values
|
||||
if status and "," in status:
|
||||
# Split comma-separated values and fetch workflows for each status
|
||||
status_list = [s.strip() for s in status.split(",")]
|
||||
all_workflows = []
|
||||
for status_value in status_list:
|
||||
workflows = await db_client.get_all_workflows(
|
||||
workflows = await db_client.get_all_workflows_for_listing(
|
||||
organization_id=user.selected_organization_id, status=status_value
|
||||
)
|
||||
all_workflows.extend(workflows)
|
||||
workflows = all_workflows
|
||||
else:
|
||||
# Single status or no status filter
|
||||
workflows = await db_client.get_all_workflows(
|
||||
workflows = await db_client.get_all_workflows_for_listing(
|
||||
organization_id=user.selected_organization_id, status=status
|
||||
)
|
||||
|
||||
# Get run counts for each workflow
|
||||
workflow_responses = []
|
||||
for workflow in workflows:
|
||||
run_count = await db_client.get_workflow_run_count(workflow.id)
|
||||
workflow_responses.append(
|
||||
{
|
||||
"id": workflow.id,
|
||||
"name": workflow.name,
|
||||
"status": workflow.status,
|
||||
"created_at": workflow.created_at,
|
||||
"workflow_definition": workflow.workflow_definition_with_fallback,
|
||||
"current_definition_id": workflow.current_definition_id,
|
||||
"template_context_variables": workflow.template_context_variables,
|
||||
"call_disposition_codes": workflow.call_disposition_codes,
|
||||
"workflow_configurations": workflow.workflow_configurations,
|
||||
"total_runs": run_count,
|
||||
}
|
||||
)
|
||||
# Get run counts for all workflows in a single query
|
||||
workflow_ids = [workflow.id for workflow in workflows]
|
||||
run_counts = await db_client.get_workflow_run_counts(workflow_ids)
|
||||
|
||||
return workflow_responses
|
||||
return [
|
||||
WorkflowListResponse(
|
||||
id=workflow.id,
|
||||
name=workflow.name,
|
||||
status=workflow.status,
|
||||
created_at=workflow.created_at,
|
||||
total_runs=run_counts.get(workflow.id, 0),
|
||||
)
|
||||
for workflow in workflows
|
||||
]
|
||||
|
||||
|
||||
@router.get("/fetch/{workflow_id}")
|
||||
|
|
|
|||
|
|
@ -170,13 +170,6 @@ class CampaignCallDispatcher:
|
|||
)
|
||||
raise ValueError(f"Workflow {campaign.workflow_id} not found")
|
||||
|
||||
# Merge context variables (queued_run context already includes retry info if applicable)
|
||||
initial_context = {
|
||||
**workflow.template_context_variables,
|
||||
**queued_run.context_variables,
|
||||
"campaign_id": campaign.id,
|
||||
}
|
||||
|
||||
# Extract phone number
|
||||
phone_number = queued_run.context_variables.get("phone_number")
|
||||
if not phone_number:
|
||||
|
|
@ -186,13 +179,25 @@ class CampaignCallDispatcher:
|
|||
)
|
||||
raise ValueError(f"No phone number in queued run {queued_run.id}")
|
||||
|
||||
# Create workflow run with queued_run_id tracking
|
||||
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
|
||||
|
||||
# Get provider first to determine the mode
|
||||
provider = await self.get_telephony_provider(campaign.organization_id)
|
||||
workflow_run_mode = provider.PROVIDER_NAME
|
||||
|
||||
logger.info(f"Provider name: {provider.PROVIDER_NAME}")
|
||||
logger.info(f"Queued run context: {queued_run.context_variables}")
|
||||
|
||||
# Merge context variables (queued_run context already includes retry info if applicable)
|
||||
initial_context = {
|
||||
**workflow.template_context_variables,
|
||||
**queued_run.context_variables,
|
||||
"campaign_id": campaign.id,
|
||||
"provider": provider.PROVIDER_NAME,
|
||||
}
|
||||
|
||||
logger.info(f"Final initial_context: {initial_context}")
|
||||
|
||||
# Create workflow run with queued_run_id tracking
|
||||
workflow_run_name = f"WR-CAMPAIGN-{campaign.id}-{queued_run.id}"
|
||||
try:
|
||||
workflow_run = await db_client.create_workflow_run(
|
||||
name=workflow_run_name,
|
||||
|
|
@ -243,6 +248,8 @@ class CampaignCallDispatcher:
|
|||
to_number=phone_number,
|
||||
webhook_url=webhook_url,
|
||||
workflow_run_id=workflow_run.id,
|
||||
workflow_id=campaign.workflow_id,
|
||||
user_id=campaign.created_by,
|
||||
)
|
||||
|
||||
# Store provider type and metadata in gathered_context
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ TTSConfig = Annotated[
|
|||
###################################################### STT ########################################################################
|
||||
|
||||
|
||||
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general"]
|
||||
DEEPGRAM_STT_MODELS = ["nova-2", "nova-3-general", "flux-general-en"]
|
||||
DEEPGRAM_LANGUAGES = [
|
||||
"multi",
|
||||
"en",
|
||||
|
|
|
|||
|
|
@ -103,7 +103,6 @@ class LoopTalkPipelineBuilder:
|
|||
|
||||
# Set the context and audio_buffer after creation
|
||||
engine.set_context(context)
|
||||
engine.set_audio_buffer(audio_buffer)
|
||||
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ from pipecat.frames.frames import (
|
|||
Frame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
StartFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
|
||||
|
||||
class InternalFrameSerializer(FrameSerializer):
|
||||
|
|
@ -24,15 +23,6 @@ class InternalFrameSerializer(FrameSerializer):
|
|||
preventing control frames from creating infinite loops.
|
||||
"""
|
||||
|
||||
@property
|
||||
def type(self) -> FrameSerializerType:
|
||||
"""Internal transport uses binary frames."""
|
||||
return FrameSerializerType.BINARY
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
"""No setup required for internal transport."""
|
||||
pass
|
||||
|
||||
async def serialize(self, frame: Frame) -> bytes | None:
|
||||
"""Only serialize audio frames for transmission between agents."""
|
||||
# Only pass audio frames between agents
|
||||
|
|
|
|||
|
|
@ -22,16 +22,21 @@ from pipecat.pipeline.task import PipelineTask
|
|||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
|
||||
|
||||
def register_transport_event_handlers(
|
||||
def register_event_handlers(
|
||||
task: PipelineTask,
|
||||
transport,
|
||||
workflow_run_id,
|
||||
workflow_run_id: int,
|
||||
engine: PipecatEngine,
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
in_memory_logs_buffer: InMemoryLogsBuffer,
|
||||
pipeline_metrics_aggregator: PipelineMetricsAggregator,
|
||||
audio_config=AudioConfig,
|
||||
):
|
||||
"""Register event handlers for transport events"""
|
||||
"""Register all event handlers for transport and task events.
|
||||
|
||||
Returns:
|
||||
Tuple of (in_memory_audio_buffer, in_memory_transcript_buffer) for use by other handlers.
|
||||
"""
|
||||
# Initialize in-memory buffers with proper audio configuration
|
||||
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
|
||||
num_channels = 1 # Pipeline audio is always mono
|
||||
|
|
@ -48,13 +53,35 @@ def register_transport_event_handlers(
|
|||
)
|
||||
in_memory_transcript_buffer = InMemoryTranscriptBuffer(workflow_run_id)
|
||||
|
||||
# Track both events to ensure LLM is only triggered after both occur
|
||||
ready_state = {
|
||||
"pipeline_started": False,
|
||||
"client_connected": False,
|
||||
"llm_triggered": False,
|
||||
}
|
||||
|
||||
async def maybe_trigger_llm():
|
||||
"""Trigger LLM only after both pipeline_started and client_connected events."""
|
||||
if (
|
||||
ready_state["pipeline_started"]
|
||||
and ready_state["client_connected"]
|
||||
and not ready_state["llm_triggered"]
|
||||
):
|
||||
ready_state["llm_triggered"] = True
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - triggering initial LLM generation"
|
||||
)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, participant):
|
||||
logger.debug("In on_client_connected callback handler - initializing workflow")
|
||||
async def on_client_connected(_transport, _participant):
|
||||
logger.debug("In on_client_connected callback handler")
|
||||
await audio_buffer.start_recording()
|
||||
ready_state["client_connected"] = True
|
||||
await maybe_trigger_llm()
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, participant):
|
||||
async def on_client_disconnected(_transport, _participant):
|
||||
call_disposed = engine.is_call_disposed()
|
||||
|
||||
logger.debug(
|
||||
|
|
@ -69,33 +96,16 @@ def register_transport_event_handlers(
|
|||
if not call_disposed:
|
||||
await task.cancel()
|
||||
|
||||
# Return the buffers so they can be passed to other handlers
|
||||
return in_memory_audio_buffer, in_memory_transcript_buffer
|
||||
|
||||
|
||||
def register_task_event_handler(
|
||||
workflow_run_id: int,
|
||||
engine: PipecatEngine,
|
||||
task: PipelineTask,
|
||||
transport,
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
in_memory_audio_buffer: InMemoryAudioBuffer,
|
||||
in_memory_transcript_buffer: InMemoryTranscriptBuffer,
|
||||
in_memory_logs_buffer: InMemoryLogsBuffer,
|
||||
pipeline_metrics_aggregator: PipelineMetricsAggregator,
|
||||
):
|
||||
@task.event_handler("on_pipeline_started")
|
||||
async def on_pipeline_started(task: PipelineTask, frame: Frame):
|
||||
logger.debug(
|
||||
"In on_pipeline_started callback handler - triggering initial LLM generation"
|
||||
)
|
||||
# Trigger initial LLM generation after pipeline has started
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
|
||||
logger.debug("In on_pipeline_started callback handler")
|
||||
ready_state["pipeline_started"] = True
|
||||
await maybe_trigger_llm()
|
||||
|
||||
@task.event_handler("on_pipeline_finished")
|
||||
async def on_pipeline_finished(
|
||||
task: PipelineTask,
|
||||
frame: Frame,
|
||||
_frame: Frame,
|
||||
):
|
||||
logger.debug(f"In on_pipeline_finished callback handler")
|
||||
|
||||
|
|
@ -207,14 +217,13 @@ def register_task_event_handler(
|
|||
if workflow_run and workflow_run.campaign_id:
|
||||
await campaign_call_dispatcher.release_call_slot(workflow_run_id)
|
||||
|
||||
# Write buffers to temp files and enqueue S3 upload
|
||||
# Write buffers to temp files and enqueue combined processing task
|
||||
audio_temp_path = None
|
||||
transcript_temp_path = None
|
||||
|
||||
try:
|
||||
# Only upload if buffers have content
|
||||
if not in_memory_audio_buffer.is_empty:
|
||||
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
|
||||
await enqueue_job(
|
||||
FunctionNames.UPLOAD_AUDIO_TO_S3, workflow_run_id, audio_temp_path
|
||||
)
|
||||
else:
|
||||
logger.debug("Audio buffer is empty, skipping upload")
|
||||
|
||||
|
|
@ -222,11 +231,6 @@ def register_task_event_handler(
|
|||
transcript_temp_path = (
|
||||
await in_memory_transcript_buffer.write_to_temp_file()
|
||||
)
|
||||
await enqueue_job(
|
||||
FunctionNames.UPLOAD_TRANSCRIPT_TO_S3,
|
||||
workflow_run_id,
|
||||
transcript_temp_path,
|
||||
)
|
||||
else:
|
||||
logger.debug("Transcript buffer is empty, skipping upload")
|
||||
|
||||
|
|
@ -234,10 +238,18 @@ def register_task_event_handler(
|
|||
logger.error(f"Error preparing buffers for S3 upload: {e}", exc_info=True)
|
||||
|
||||
await enqueue_job(FunctionNames.CALCULATE_WORKFLOW_RUN_COST, workflow_run_id)
|
||||
|
||||
# Combined task: uploads artifacts then runs integrations sequentially
|
||||
await enqueue_job(
|
||||
FunctionNames.RUN_INTEGRATIONS_POST_WORKFLOW_RUN, workflow_run_id
|
||||
FunctionNames.PROCESS_WORKFLOW_COMPLETION,
|
||||
workflow_run_id,
|
||||
audio_temp_path,
|
||||
transcript_temp_path,
|
||||
)
|
||||
|
||||
# Return the buffers so they can be passed to other handlers
|
||||
return in_memory_audio_buffer, in_memory_transcript_buffer
|
||||
|
||||
|
||||
def register_audio_data_handler(
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
|
|
@ -260,18 +272,26 @@ def register_audio_data_handler(
|
|||
# Could implement overflow to disk here if needed
|
||||
|
||||
|
||||
def register_transcript_handler(
|
||||
transcript, workflow_run_id, in_memory_buffer: InMemoryTranscriptBuffer
|
||||
def register_transcript_handlers(
|
||||
user_aggregator,
|
||||
assistant_aggregator,
|
||||
workflow_run_id,
|
||||
in_memory_buffer: InMemoryTranscriptBuffer,
|
||||
):
|
||||
"""Register event handler for transcript updates"""
|
||||
"""Register event handlers for transcript updates on context aggregators.
|
||||
|
||||
@transcript.event_handler("on_transcript_update")
|
||||
async def on_transcript_update(processor, frame):
|
||||
transcript_text = ""
|
||||
for msg in frame.messages:
|
||||
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
|
||||
line = f"{timestamp}{msg.role}: {msg.content}\n"
|
||||
transcript_text += line
|
||||
Uses the on_user_turn_stopped and on_assistant_turn_stopped events to capture
|
||||
transcripts as turns complete, following the event-based pattern.
|
||||
"""
|
||||
|
||||
# Use in-memory buffer
|
||||
await in_memory_buffer.append(transcript_text)
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(aggregator, strategy, message):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}user: {message.content}\n"
|
||||
await in_memory_buffer.append(line)
|
||||
|
||||
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
||||
async def on_assistant_turn_stopped(aggregator, message):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}assistant: {message.content}\n"
|
||||
await in_memory_buffer.append(line)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
|
@ -11,14 +10,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.processors.transcript_processor import TranscriptProcessor
|
||||
from pipecat.utils.context import turn_var
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
|
||||
|
||||
def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine"):
|
||||
def create_pipeline_components(audio_config: AudioConfig):
|
||||
"""Create and return the main pipeline components with proper audio configuration"""
|
||||
logger.info(f"Creating pipeline components with audio config: {audio_config}")
|
||||
|
||||
|
|
@ -28,28 +23,21 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
|
|||
buffer_size=audio_config.buffer_size_bytes,
|
||||
)
|
||||
|
||||
transcript = TranscriptProcessor(
|
||||
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
|
||||
return audio_buffer, transcript, context
|
||||
return audio_buffer, context
|
||||
|
||||
|
||||
def build_pipeline(
|
||||
transport,
|
||||
stt,
|
||||
transcript,
|
||||
audio_buffer,
|
||||
llm,
|
||||
tts,
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
pipeline_engine_callback_processor,
|
||||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
voicemail_detector=None,
|
||||
):
|
||||
"""Build the main pipeline with all components.
|
||||
|
|
@ -63,7 +51,7 @@ def build_pipeline(
|
|||
# Build processors list with optional voicemail detection
|
||||
processors = [
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT (audio_passthrough=True by default, passes InputAudioRawFrame)
|
||||
stt,
|
||||
]
|
||||
|
||||
# Insert voicemail detector after STT if enabled
|
||||
|
|
@ -76,16 +64,12 @@ def build_pipeline(
|
|||
# Continue with the rest of the pipeline
|
||||
processors.extend(
|
||||
[
|
||||
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
|
||||
user_idle_disconnect,
|
||||
transcript.user(),
|
||||
user_context_aggregator,
|
||||
llm, # LLM
|
||||
pipeline_engine_callback_processor,
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
audio_buffer, # AudioBufferProcessor - records both input and output audio
|
||||
transcript.assistant(),
|
||||
assistant_context_aggregator, # Assistant spoken responses
|
||||
pipeline_metrics_aggregator,
|
||||
]
|
||||
|
|
@ -98,7 +82,6 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
|
|||
"""Create a pipeline task with appropriate parameters"""
|
||||
# Set up pipeline params with audio configuration if provided
|
||||
pipeline_params = PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
send_initial_empty_metrics=False,
|
||||
|
|
@ -119,6 +102,7 @@ def create_pipeline_task(pipeline, workflow_run_id, audio_config: AudioConfig =
|
|||
pipeline,
|
||||
params=pipeline_params,
|
||||
enable_tracing=ENABLE_TRACING,
|
||||
enable_rtvi=False,
|
||||
conversation_id=f"{workflow_run_id}",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@ from loguru import logger
|
|||
from api.db import db_client
|
||||
from api.db.models import WorkflowModel
|
||||
from api.enums import WorkflowRunMode
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
|
||||
from api.services.pipecat.event_handlers import (
|
||||
register_audio_data_handler,
|
||||
register_task_event_handler,
|
||||
register_transcript_handler,
|
||||
register_transport_event_handlers,
|
||||
register_event_handlers,
|
||||
register_transcript_handlers,
|
||||
)
|
||||
from api.services.pipecat.in_memory_buffers import InMemoryLogsBuffer
|
||||
from api.services.pipecat.pipeline_builder import (
|
||||
|
|
@ -46,20 +46,25 @@ from api.services.workflow.pipecat_engine import PipecatEngine
|
|||
from api.services.workflow.workflow import WorkflowGraph
|
||||
from pipecat.extensions.voicemail.voicemail_detector import VoicemailDetector
|
||||
from pipecat.pipeline.base_task import PipelineTaskParams
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMAssistantAggregatorParams,
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.processors.filters.stt_mute_filter import (
|
||||
STTMuteConfig,
|
||||
STTMuteFilter,
|
||||
STTMuteStrategy,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
|
||||
from pipecat.turns.user_mute import MuteUntilFirstBotCompleteUserMuteStrategy
|
||||
from pipecat.turns.user_start import (
|
||||
ExternalUserTurnStartStrategy,
|
||||
TranscriptionUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_start.vad_user_turn_start_strategy import (
|
||||
VADUserTurnStartStrategy,
|
||||
)
|
||||
from pipecat.turns.user_stop import (
|
||||
ExternalUserTurnStopStrategy,
|
||||
TranscriptionUserTurnStopStrategy,
|
||||
)
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
from pipecat.utils.tracing.context_registry import ContextProviderRegistry
|
||||
|
|
@ -517,12 +522,11 @@ async def _run_pipeline(
|
|||
embeddings_model=embeddings_model,
|
||||
)
|
||||
|
||||
# Create pipeline components with audio configuration and engine
|
||||
audio_buffer, transcript, context = create_pipeline_components(audio_config, engine)
|
||||
# Create pipeline components with audio configuration
|
||||
audio_buffer, context = create_pipeline_components(audio_config)
|
||||
|
||||
# Set the context and audio_buffer after creation
|
||||
engine.set_context(context)
|
||||
engine.set_audio_buffer(audio_buffer)
|
||||
|
||||
# Set Stasis connection for immediate transfers (if available)
|
||||
if stasis_connection:
|
||||
|
|
@ -532,7 +536,31 @@ async def _run_pipeline(
|
|||
expect_stripped_words=True,
|
||||
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
|
||||
)
|
||||
user_params = LLMUserAggregatorParams(enable_emulated_vad_interruptions=True)
|
||||
|
||||
# Configure turn strategies based on STT provider and model
|
||||
# Deepgram Flux uses external turn detection (VAD + External start/stop)
|
||||
# Other models use transcription-based turn detection with smart turn analyzer
|
||||
is_deepgram_flux = (
|
||||
user_config.stt.provider == ServiceProviders.DEEPGRAM.value
|
||||
and user_config.stt.model == "flux-general-en"
|
||||
)
|
||||
|
||||
if is_deepgram_flux:
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), ExternalUserTurnStartStrategy()],
|
||||
stop=[ExternalUserTurnStopStrategy()],
|
||||
)
|
||||
else:
|
||||
user_turn_strategies = UserTurnStrategies(
|
||||
start=[VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()],
|
||||
stop=[TranscriptionUserTurnStopStrategy()],
|
||||
)
|
||||
|
||||
user_params = LLMUserAggregatorParams(
|
||||
user_turn_strategies=user_turn_strategies,
|
||||
user_mute_strategies=[MuteUntilFirstBotCompleteUserMuteStrategy()],
|
||||
user_idle_timeout=max_user_idle_timeout,
|
||||
)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, assistant_params=assistant_params, user_params=user_params
|
||||
)
|
||||
|
|
@ -547,25 +575,20 @@ async def _run_pipeline(
|
|||
|
||||
pipeline_metrics_aggregator = PipelineMetricsAggregator()
|
||||
|
||||
# Create STT mute filter using the selected strategies and the engine's callback
|
||||
stt_mute_filter = STTMuteFilter(
|
||||
config=STTMuteConfig(
|
||||
strategies={
|
||||
STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE,
|
||||
STTMuteStrategy.CUSTOM,
|
||||
},
|
||||
should_mute_callback=engine.create_should_mute_callback(),
|
||||
)
|
||||
)
|
||||
|
||||
# Use engine's user idle callback with configured timeout
|
||||
user_idle_disconnect = UserIdleProcessor(
|
||||
callback=engine.create_user_idle_callback(), timeout=max_user_idle_timeout
|
||||
)
|
||||
|
||||
user_context_aggregator = context_aggregator.user()
|
||||
assistant_context_aggregator = context_aggregator.assistant()
|
||||
|
||||
# Register user idle event handlers
|
||||
user_idle_handler = engine.create_user_idle_handler()
|
||||
|
||||
@user_context_aggregator.event_handler("on_user_turn_idle")
|
||||
async def on_user_turn_idle(aggregator):
|
||||
await user_idle_handler.handle_idle(aggregator)
|
||||
|
||||
@user_context_aggregator.event_handler("on_user_turn_started")
|
||||
async def on_user_turn_started(aggregator, strategy):
|
||||
user_idle_handler.reset()
|
||||
|
||||
# Create voicemail detector if enabled in the workflow's start node
|
||||
voicemail_detector = None
|
||||
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
|
||||
|
|
@ -592,16 +615,13 @@ async def _run_pipeline(
|
|||
pipeline = build_pipeline(
|
||||
transport,
|
||||
stt,
|
||||
transcript,
|
||||
audio_buffer,
|
||||
llm,
|
||||
tts,
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
pipeline_engine_callback_processor,
|
||||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
voicemail_detector=voicemail_detector,
|
||||
)
|
||||
|
||||
|
|
@ -614,18 +634,6 @@ async def _run_pipeline(
|
|||
# Initialize the engine to set the initial context
|
||||
await engine.initialize()
|
||||
|
||||
# Register event handlers
|
||||
in_memory_audio_buffer, in_memory_transcript_buffer = (
|
||||
register_transport_event_handlers(
|
||||
task,
|
||||
transport,
|
||||
workflow_run_id,
|
||||
engine=engine,
|
||||
audio_buffer=audio_buffer,
|
||||
audio_config=audio_config,
|
||||
)
|
||||
)
|
||||
|
||||
# Add real-time feedback observer if WebSocket sender is available
|
||||
# Note: ws_sender was already fetched earlier for node_transition_callback
|
||||
if ws_sender:
|
||||
|
|
@ -635,21 +643,24 @@ async def _run_pipeline(
|
|||
)
|
||||
task.add_observer(feedback_observer)
|
||||
|
||||
register_task_event_handler(
|
||||
workflow_run_id,
|
||||
engine,
|
||||
# Register event handlers
|
||||
in_memory_audio_buffer, in_memory_transcript_buffer = register_event_handlers(
|
||||
task,
|
||||
transport,
|
||||
audio_buffer,
|
||||
in_memory_audio_buffer,
|
||||
in_memory_transcript_buffer,
|
||||
in_memory_logs_buffer,
|
||||
pipeline_metrics_aggregator,
|
||||
workflow_run_id,
|
||||
engine=engine,
|
||||
audio_buffer=audio_buffer,
|
||||
in_memory_logs_buffer=in_memory_logs_buffer,
|
||||
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
|
||||
audio_config=audio_config,
|
||||
)
|
||||
|
||||
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)
|
||||
register_transcript_handler(
|
||||
transcript, workflow_run_id, in_memory_transcript_buffer
|
||||
register_transcript_handlers(
|
||||
user_context_aggregator,
|
||||
assistant_context_aggregator,
|
||||
workflow_run_id,
|
||||
in_memory_transcript_buffer,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from api.constants import MPS_API_URL
|
|||
from api.services.configuration.registry import ServiceProviders
|
||||
from pipecat.services.azure.llm import AzureLLMService
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService, LiveOptions
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.dograh.llm import DograhLLMService
|
||||
|
|
@ -34,6 +35,20 @@ def create_stt_service(user_config):
|
|||
f"Creating STT service: provider={user_config.stt.provider}, model={user_config.stt.model}"
|
||||
)
|
||||
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
|
||||
# Check if using Flux model (English-only, no language selection)
|
||||
if user_config.stt.model == "flux-general-en":
|
||||
logger.debug("Using DeepGram Flux Model")
|
||||
return DeepgramFluxSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
model=user_config.stt.model,
|
||||
params=DeepgramFluxSTTService.InputParams(
|
||||
eot_timeout_ms=3000,
|
||||
eot_threshold=0.7,
|
||||
),
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
)
|
||||
|
||||
# Other models than flux
|
||||
# Use language from user config, defaulting to "multi" for multilingual support
|
||||
language = getattr(user_config.stt, "language", None) or "multi"
|
||||
live_options = LiveOptions(
|
||||
|
|
@ -44,7 +59,9 @@ def create_stt_service(user_config):
|
|||
)
|
||||
logger.debug(f"Using DeepGram Model - {user_config.stt.model}")
|
||||
return DeepgramSTTService(
|
||||
live_options=live_options, api_key=user_config.stt.api_key
|
||||
live_options=live_options,
|
||||
api_key=user_config.stt.api_key,
|
||||
should_interrupt=False, # Let UserAggregator take care of sending InterruptionFrame
|
||||
)
|
||||
elif user_config.stt.provider == ServiceProviders.OPENAI.value:
|
||||
return OpenAISTTService(
|
||||
|
|
|
|||
|
|
@ -2,10 +2,9 @@ import os
|
|||
|
||||
from fastapi import WebSocket
|
||||
|
||||
from api.constants import APP_ROOT_DIR, ENABLE_RNNOISE, ENABLE_SMART_TURN
|
||||
from api.constants import APP_ROOT_DIR
|
||||
from api.db import db_client
|
||||
from api.enums import OrganizationConfigurationKey
|
||||
from api.services.looptalk.internal_transport import InternalTransport
|
||||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
from api.services.telephony.stasis_rtp_serializer import StasisRTPFrameSerializer
|
||||
|
|
@ -13,11 +12,8 @@ from api.services.telephony.stasis_rtp_transport import (
|
|||
StasisRTPTransport,
|
||||
StasisRTPTransportParams,
|
||||
)
|
||||
from pipecat.audio.filters.rnnoise_filter import RNNoiseFilter
|
||||
from pipecat.audio.mixers.silence_mixer import SilenceAudioMixer
|
||||
from pipecat.audio.mixers.soundfile_mixer import SoundfileMixer
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer, VADParams
|
||||
from pipecat.serializers.twilio import TwilioFrameSerializer
|
||||
from pipecat.serializers.vobiz import VobizFrameSerializer
|
||||
|
|
@ -35,19 +31,6 @@ librnnoise_path = os.path.normpath(
|
|||
)
|
||||
|
||||
|
||||
def create_turn_analyzer(workflow_run_id: int, audio_config: AudioConfig):
|
||||
"""Create a turn analyzer backed by the local Smart Turn HTTP service.
|
||||
|
||||
Args:
|
||||
workflow_run_id: ID of the workflow run for turn analyzer context
|
||||
audio_config: Audio configuration containing pipeline sample rate
|
||||
"""
|
||||
if ENABLE_SMART_TURN:
|
||||
return LocalSmartTurnAnalyzerV3(params=SmartTurnParams())
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def create_twilio_transport(
|
||||
websocket_client: WebSocket,
|
||||
stream_sid: str,
|
||||
|
|
@ -78,8 +61,6 @@ async def create_twilio_transport(
|
|||
f"Incomplete Twilio configuration for organization {organization_id}"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = TwilioFrameSerializer(
|
||||
stream_sid=stream_sid,
|
||||
call_sid=call_sid,
|
||||
|
|
@ -119,11 +100,7 @@ async def create_twilio_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -158,8 +135,6 @@ async def create_cloudonix_transport(
|
|||
f"Required: bearer_token, domain_id"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
from pipecat.serializers.cloudonix import CloudonixFrameSerializer
|
||||
|
||||
serializer = CloudonixFrameSerializer(
|
||||
|
|
@ -202,11 +177,7 @@ async def create_cloudonix_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -238,8 +209,6 @@ async def create_vonage_transport(
|
|||
f"Incomplete Vonage configuration for organization {organization_id}"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = VonageFrameSerializer(
|
||||
call_uuid=call_uuid,
|
||||
application_id=application_id,
|
||||
|
|
@ -283,11 +252,7 @@ async def create_vonage_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -337,8 +302,6 @@ async def create_vobiz_transport(
|
|||
f"from_numbers={len(config.get('from_numbers', []))} numbers"
|
||||
)
|
||||
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
# Use VobizFrameSerializer for Vobiz WebSocket protocol
|
||||
serializer = VobizFrameSerializer(
|
||||
stream_id=stream_id,
|
||||
|
|
@ -389,11 +352,7 @@ async def create_vobiz_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -411,7 +370,6 @@ def create_webrtc_transport(
|
|||
ambient_noise_config: dict | None = None,
|
||||
):
|
||||
"""Create a transport for WebRTC connections"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
return SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
|
|
@ -445,10 +403,6 @@ def create_webrtc_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -461,7 +415,6 @@ def create_stasis_transport(
|
|||
ambient_noise_config: dict | None = None,
|
||||
):
|
||||
"""Create a transport for ARI connections"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
|
||||
serializer = StasisRTPFrameSerializer(
|
||||
StasisRTPFrameSerializer.InputParams(
|
||||
|
|
@ -502,11 +455,7 @@ def create_stasis_transport(
|
|||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
serializer=serializer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
)
|
||||
|
||||
|
|
@ -528,46 +477,44 @@ def create_internal_transport(
|
|||
Returns:
|
||||
InternalTransport instance configured with turn analyzer
|
||||
"""
|
||||
turn_analyzer = create_turn_analyzer(workflow_run_id, audio_config)
|
||||
pass
|
||||
# Commented out because looptalk coming in the regular import flow
|
||||
# was causing issue. May be move this to looptalk/orchestrator.py
|
||||
|
||||
# Create and return the internal transport with latency
|
||||
return InternalTransport(
|
||||
params=TransportParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
audio_out_channels=1,
|
||||
audio_in_enabled=True,
|
||||
audio_in_sample_rate=audio_config.transport_in_sample_rate,
|
||||
audio_in_channels=1,
|
||||
vad_analyzer=(
|
||||
SileroVADAnalyzer(
|
||||
params=VADParams(
|
||||
confidence=vad_config.get("confidence", 0.7),
|
||||
start_secs=vad_config.get("start_seconds", 0.4),
|
||||
stop_secs=vad_config.get("stop_seconds", 0.8),
|
||||
min_volume=vad_config.get("minimum_volume", 0.6),
|
||||
)
|
||||
)
|
||||
if vad_config
|
||||
else SileroVADAnalyzer()
|
||||
),
|
||||
audio_out_mixer=(
|
||||
SoundfileMixer(
|
||||
sound_files={
|
||||
"office": APP_ROOT_DIR
|
||||
/ "assets"
|
||||
/ f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
|
||||
},
|
||||
default_sound="office",
|
||||
volume=ambient_noise_config.get("volume", 0.3),
|
||||
)
|
||||
if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
else SilenceAudioMixer()
|
||||
),
|
||||
turn_analyzer=turn_analyzer,
|
||||
audio_in_filter=RNNoiseFilter(library_path=librnnoise_path)
|
||||
if ENABLE_RNNOISE
|
||||
else None,
|
||||
),
|
||||
latency_seconds=latency_seconds,
|
||||
)
|
||||
# return InternalTransport(
|
||||
# params=TransportParams(
|
||||
# audio_out_enabled=True,
|
||||
# audio_out_sample_rate=audio_config.transport_out_sample_rate,
|
||||
# audio_out_channels=1,
|
||||
# audio_in_enabled=True,
|
||||
# audio_in_sample_rate=audio_config.transport_in_sample_rate,
|
||||
# audio_in_channels=1,
|
||||
# vad_analyzer=(
|
||||
# SileroVADAnalyzer(
|
||||
# params=VADParams(
|
||||
# confidence=vad_config.get("confidence", 0.7),
|
||||
# start_secs=vad_config.get("start_seconds", 0.4),
|
||||
# stop_secs=vad_config.get("stop_seconds", 0.8),
|
||||
# min_volume=vad_config.get("minimum_volume", 0.6),
|
||||
# )
|
||||
# )
|
||||
# if vad_config
|
||||
# else SileroVADAnalyzer()
|
||||
# ),
|
||||
# audio_out_mixer=(
|
||||
# SoundfileMixer(
|
||||
# sound_files={
|
||||
# "office": APP_ROOT_DIR
|
||||
# / "assets"
|
||||
# / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
|
||||
# },
|
||||
# default_sound="office",
|
||||
# volume=ambient_noise_config.get("volume", 0.3),
|
||||
# )
|
||||
# if ambient_noise_config and ambient_noise_config.get("enabled", False)
|
||||
# else SilenceAudioMixer()
|
||||
# ),
|
||||
# ),
|
||||
# latency_seconds=latency_seconds,
|
||||
# )
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ The serializer:
|
|||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
|
|
@ -22,8 +24,7 @@ from pipecat.frames.frames import (
|
|||
InputAudioRawFrame,
|
||||
StartFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
from pydantic import BaseModel
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
|
||||
|
||||
class StasisRTPFrameSerializer(FrameSerializer):
|
||||
|
|
@ -59,11 +60,6 @@ class StasisRTPFrameSerializer(FrameSerializer):
|
|||
# Resampler shared between encode / decode paths
|
||||
self._resampler = create_default_resampler()
|
||||
|
||||
@property
|
||||
def type(self) -> FrameSerializerType:
|
||||
"""Stasis uses raw bytes → BINARY."""
|
||||
return FrameSerializerType.BINARY
|
||||
|
||||
async def setup(self, frame: StartFrame):
|
||||
"""Remember pipeline configuration."""
|
||||
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ from pipecat.utils.enums import EndTaskReason
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.telephony.stasis_rtp_connection import StasisRTPConnection
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
|
|
@ -64,7 +63,6 @@ class PipecatEngine:
|
|||
transport: Optional[BaseTransport] = None,
|
||||
workflow: WorkflowGraph,
|
||||
call_context_vars: dict,
|
||||
audio_buffer: Optional["AudioBuffer"] = None,
|
||||
workflow_run_id: Optional[int] = None,
|
||||
node_transition_callback: Optional[
|
||||
Callable[[str, Optional[str]], Awaitable[None]]
|
||||
|
|
@ -78,7 +76,6 @@ class PipecatEngine:
|
|||
self.transport = transport
|
||||
self.workflow = workflow
|
||||
self._call_context_vars = call_context_vars
|
||||
self._audio_buffer = audio_buffer
|
||||
self._workflow_run_id = workflow_run_id
|
||||
self._node_transition_callback = node_transition_callback
|
||||
self._initialized = False
|
||||
|
|
@ -204,6 +201,7 @@ class PipecatEngine:
|
|||
logger.info(f"Arguments: {function_call_params.arguments}")
|
||||
await self.set_node(transition_to_node)
|
||||
try:
|
||||
|
||||
async def on_context_updated() -> None:
|
||||
"""
|
||||
pipecat framework will run this function after the function call result has been updated in the context.
|
||||
|
|
@ -215,6 +213,12 @@ class PipecatEngine:
|
|||
self._current_node
|
||||
)
|
||||
|
||||
# Queue EndFrame if we just transitioned to EndNode
|
||||
if self._current_node.is_end:
|
||||
await self.send_end_task_frame(
|
||||
EndTaskReason.USER_QUALIFIED.value
|
||||
)
|
||||
|
||||
result = {"status": "done"}
|
||||
|
||||
properties = FunctionCallResultProperties(
|
||||
|
|
@ -478,8 +482,6 @@ class PipecatEngine:
|
|||
if node.extraction_enabled and node.extraction_variables:
|
||||
await self._perform_variable_extraction_if_needed(node)
|
||||
|
||||
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
|
||||
|
||||
async def _handle_agent_node(self, node: Node) -> None:
|
||||
"""Handle agent node execution."""
|
||||
if node.is_static:
|
||||
|
|
@ -680,12 +682,12 @@ class PipecatEngine:
|
|||
"""
|
||||
return engine_callbacks.create_should_mute_callback(self)
|
||||
|
||||
def create_user_idle_callback(self):
|
||||
def create_user_idle_handler(self):
|
||||
"""
|
||||
This callback is called when the user is idle for a certain duration.
|
||||
We use this to either play the static text or end the call
|
||||
Returns a UserIdleHandler that manages user-idle timeouts with state.
|
||||
The handler tracks retry count and handles escalating prompts.
|
||||
"""
|
||||
return engine_callbacks.create_user_idle_callback(self)
|
||||
return engine_callbacks.create_user_idle_handler(self)
|
||||
|
||||
def create_max_duration_callback(self):
|
||||
"""
|
||||
|
|
@ -721,14 +723,6 @@ class PipecatEngine:
|
|||
"""
|
||||
self.task = task
|
||||
|
||||
def set_audio_buffer(self, audio_buffer: "AudioBuffer") -> None:
|
||||
"""Set the audio buffer.
|
||||
|
||||
This allows setting the audio buffer after the engine has been created,
|
||||
which is useful when the audio buffer needs to be created after the engine.
|
||||
"""
|
||||
self._audio_buffer = audio_buffer
|
||||
|
||||
def set_stasis_connection(
|
||||
self, connection: Optional["StasisRTPConnection"]
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ from pipecat.utils.enums import EndTaskReason
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -57,33 +56,43 @@ def create_should_mute_callback(
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_user_idle_callback(engine: "PipecatEngine"):
|
||||
"""Return a callback that handles user-idle timeouts."""
|
||||
class UserIdleHandler:
|
||||
"""Helper class to manage user idle retry logic with state."""
|
||||
|
||||
async def handle_user_idle(
|
||||
user_idle: "UserIdleProcessor", retry_count: int
|
||||
) -> bool:
|
||||
logger.debug(f"Handling user_idle, attempt: {retry_count}")
|
||||
def __init__(self, engine: "PipecatEngine"):
|
||||
self._engine = engine
|
||||
self._retry_count = 0
|
||||
|
||||
if retry_count == 1:
|
||||
def reset(self):
|
||||
"""Reset the retry count when user becomes active."""
|
||||
self._retry_count = 0
|
||||
|
||||
async def handle_idle(self, aggregator):
|
||||
"""Handle user idle event with escalating prompts."""
|
||||
self._retry_count += 1
|
||||
logger.debug(f"Handling user_idle, attempt: {self._retry_count}")
|
||||
|
||||
if self._retry_count == 1:
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there in the language that the user has been speaking so far.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return True
|
||||
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return
|
||||
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. We will be disconnecting the call now. Wish them a good day in the language that the user has been speaking so far.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
await engine.send_end_task_frame(
|
||||
await aggregator.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
await self._engine.send_end_task_frame(
|
||||
EndTaskReason.USER_IDLE_MAX_DURATION_EXCEEDED.value
|
||||
)
|
||||
return False
|
||||
|
||||
return handle_user_idle
|
||||
|
||||
def create_user_idle_handler(engine: "PipecatEngine") -> UserIdleHandler:
|
||||
"""Return a UserIdleHandler that manages user-idle timeouts with state."""
|
||||
return UserIdleHandler(engine)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -49,8 +49,7 @@ from api.tasks.campaign_tasks import (
|
|||
from api.tasks.knowledge_base_processing import process_knowledge_base_document
|
||||
from api.tasks.run_integrations import run_integrations_post_workflow_run
|
||||
from api.tasks.s3_upload import (
|
||||
upload_audio_to_s3,
|
||||
upload_transcript_to_s3,
|
||||
process_workflow_completion,
|
||||
upload_voicemail_audio_to_s3,
|
||||
)
|
||||
|
||||
|
|
@ -59,9 +58,8 @@ class WorkerSettings:
|
|||
functions = [
|
||||
calculate_workflow_run_cost,
|
||||
run_integrations_post_workflow_run,
|
||||
upload_audio_to_s3,
|
||||
upload_transcript_to_s3,
|
||||
upload_voicemail_audio_to_s3,
|
||||
process_workflow_completion,
|
||||
sync_campaign_source,
|
||||
process_campaign_batch,
|
||||
monitor_campaign_progress,
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
class FunctionNames:
|
||||
CALCULATE_WORKFLOW_RUN_COST = "calculate_workflow_run_cost"
|
||||
RUN_INTEGRATIONS_POST_WORKFLOW_RUN = "run_integrations_post_workflow_run"
|
||||
UPLOAD_AUDIO_TO_S3 = "upload_audio_to_s3"
|
||||
UPLOAD_TRANSCRIPT_TO_S3 = "upload_transcript_to_s3"
|
||||
PROCESS_WORKFLOW_COMPLETION = "process_workflow_completion"
|
||||
UPLOAD_VOICEMAIL_AUDIO_TO_S3 = "upload_voicemail_audio_to_s3"
|
||||
SYNC_CAMPAIGN_SOURCE = "sync_campaign_source"
|
||||
PROCESS_CAMPAIGN_BATCH = "process_campaign_batch"
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
"""Execute webhook integrations after workflow run completion."""
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from api.constants import BACKEND_API_ENDPOINT
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowRunModel
|
||||
from api.utils.credential_auth import build_auth_header
|
||||
|
|
@ -54,10 +55,13 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
|
|||
|
||||
logger.info(f"Found {len(webhook_nodes)} webhook nodes to execute")
|
||||
|
||||
# Step 4: Build render context
|
||||
render_context = _build_render_context(workflow_run)
|
||||
# Step 4: Generate public access token (on-demand, only when webhooks exist)
|
||||
public_token = await db_client.ensure_public_access_token(workflow_run_id)
|
||||
|
||||
# Step 5: Execute each webhook node
|
||||
# Step 5: Build render context
|
||||
render_context = _build_render_context(workflow_run, public_token)
|
||||
|
||||
# Step 6: Execute each webhook node
|
||||
for node in webhook_nodes:
|
||||
webhook_data = node.get("data", {})
|
||||
try:
|
||||
|
|
@ -77,9 +81,19 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
|
|||
raise
|
||||
|
||||
|
||||
def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
|
||||
"""Build the context dict for template rendering."""
|
||||
return {
|
||||
def _build_render_context(
|
||||
workflow_run: WorkflowRunModel, public_token: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the context dict for template rendering.
|
||||
|
||||
Args:
|
||||
workflow_run: The workflow run model
|
||||
public_token: Optional public access token for download URLs
|
||||
|
||||
Returns:
|
||||
Dict containing all fields available for template rendering
|
||||
"""
|
||||
context = {
|
||||
# Top-level fields
|
||||
"workflow_run_id": workflow_run.id,
|
||||
"workflow_run_name": workflow_run.name,
|
||||
|
|
@ -89,10 +103,25 @@ def _build_render_context(workflow_run: WorkflowRunModel) -> Dict[str, Any]:
|
|||
"initial_context": workflow_run.initial_context or {},
|
||||
"gathered_context": workflow_run.gathered_context or {},
|
||||
"cost_info": workflow_run.usage_info or {},
|
||||
"recording_url": getattr(workflow_run, "recording_url", None),
|
||||
"transcript_url": getattr(workflow_run, "transcript_url", None),
|
||||
}
|
||||
|
||||
# Add public download URLs if token is available
|
||||
if public_token:
|
||||
base_url = (
|
||||
f"{BACKEND_API_ENDPOINT}/api/v1/public/download/workflow/{public_token}"
|
||||
)
|
||||
context["recording_url"] = (
|
||||
f"{base_url}/recording" if workflow_run.recording_url else None
|
||||
)
|
||||
context["transcript_url"] = (
|
||||
f"{base_url}/transcript" if workflow_run.transcript_url else None
|
||||
)
|
||||
else:
|
||||
context["recording_url"] = workflow_run.recording_url
|
||||
context["transcript_url"] = workflow_run.transcript_url
|
||||
|
||||
return context
|
||||
|
||||
|
||||
async def _execute_webhook_node(
|
||||
webhook_data: Dict[str, Any],
|
||||
|
|
|
|||
|
|
@ -1,129 +1,27 @@
|
|||
import os
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
|
||||
from api.db import db_client
|
||||
from api.services.storage import get_current_storage_backend, storage_fs
|
||||
|
||||
|
||||
async def upload_audio_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
|
||||
"""Upload audio file from temp path to S3."""
|
||||
run_id = str(workflow_run_id)
|
||||
set_current_run_id(run_id)
|
||||
|
||||
logger.info(f"Starting audio upload to S3 from {temp_file_path}")
|
||||
|
||||
try:
|
||||
# Verify temp file exists
|
||||
if not os.path.exists(temp_file_path):
|
||||
logger.error(f"Temp audio file not found: {temp_file_path}")
|
||||
raise FileNotFoundError(f"Temp audio file not found: {temp_file_path}")
|
||||
|
||||
file_size = os.path.getsize(temp_file_path)
|
||||
logger.debug(f"Audio file size: {file_size} bytes")
|
||||
|
||||
recording_url = f"recordings/{workflow_run_id}.wav"
|
||||
storage_backend = get_current_storage_backend()
|
||||
|
||||
logger.info(
|
||||
f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for audio upload - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
|
||||
await storage_fs.aupload_file(temp_file_path, recording_url)
|
||||
|
||||
# Update DB with recording URL and storage backend
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
recording_url=recording_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Successfully uploaded audio to {storage_backend.name}: {recording_url} (stored backend: {storage_backend.name})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading audio to S3 for workflow {workflow_run_id}: {e}")
|
||||
raise
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if os.path.exists(temp_file_path):
|
||||
try:
|
||||
os.remove(temp_file_path)
|
||||
logger.debug(f"Cleaned up temp audio file: {temp_file_path}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to clean up temp audio file {temp_file_path}: {e}"
|
||||
)
|
||||
|
||||
|
||||
async def upload_transcript_to_s3(ctx, workflow_run_id: int, temp_file_path: str):
|
||||
"""Upload transcript file from temp path to S3."""
|
||||
run_id = str(workflow_run_id)
|
||||
set_current_run_id(run_id)
|
||||
|
||||
logger.info(f"Starting transcript upload to S3 from {temp_file_path}")
|
||||
|
||||
try:
|
||||
# Verify temp file exists
|
||||
if not os.path.exists(temp_file_path):
|
||||
logger.error(f"Temp transcript file not found: {temp_file_path}")
|
||||
raise FileNotFoundError(f"Temp transcript file not found: {temp_file_path}")
|
||||
|
||||
file_size = os.path.getsize(temp_file_path)
|
||||
logger.debug(f"Transcript file size: {file_size} bytes")
|
||||
|
||||
transcript_url = f"transcripts/{workflow_run_id}.txt"
|
||||
storage_backend = get_current_storage_backend()
|
||||
|
||||
logger.info(
|
||||
f"UPLOAD: Using {storage_backend.name} (value: {storage_backend.value}) for transcript upload - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
|
||||
await storage_fs.aupload_file(temp_file_path, transcript_url)
|
||||
|
||||
# Update DB with transcript URL and storage backend
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
transcript_url=transcript_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Successfully uploaded transcript to {storage_backend.name}: {transcript_url} (stored backend: {storage_backend.name})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error uploading transcript to S3 for workflow {workflow_run_id}: {e}"
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if os.path.exists(temp_file_path):
|
||||
try:
|
||||
os.remove(temp_file_path)
|
||||
logger.debug(f"Cleaned up temp transcript file: {temp_file_path}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to clean up temp transcript file {temp_file_path}: {e}"
|
||||
)
|
||||
from api.tasks.run_integrations import run_integrations_post_workflow_run
|
||||
from pipecat.utils.context import set_current_run_id
|
||||
|
||||
|
||||
async def upload_voicemail_audio_to_s3(
|
||||
ctx,
|
||||
_ctx,
|
||||
workflow_run_id: int,
|
||||
temp_file_path: str,
|
||||
s3_key: str,
|
||||
):
|
||||
"""Upload voicemail detection audio from temp file to S3.
|
||||
|
||||
This function is similar to upload_audio_to_s3 but handles voicemail-specific
|
||||
paths and doesn't update the workflow run's recording_url field.
|
||||
Handles voicemail-specific paths and doesn't update the workflow run's
|
||||
recording_url field.
|
||||
|
||||
Args:
|
||||
ctx: ARQ context
|
||||
_ctx: ARQ context (unused)
|
||||
workflow_run_id: The workflow run ID
|
||||
temp_file_path: Path to the temporary WAV file
|
||||
s3_key: The S3 key where the file should be uploaded
|
||||
|
|
@ -161,7 +59,7 @@ async def upload_voicemail_audio_to_s3(
|
|||
)
|
||||
raise
|
||||
finally:
|
||||
# Clean up temp file (same pattern as upload_audio_to_s3)
|
||||
# Clean up temp file
|
||||
if os.path.exists(temp_file_path):
|
||||
try:
|
||||
os.remove(temp_file_path)
|
||||
|
|
@ -170,3 +68,104 @@ async def upload_voicemail_audio_to_s3(
|
|||
logger.warning(
|
||||
f"Failed to clean up temp voicemail audio file {temp_file_path}: {e}"
|
||||
)
|
||||
|
||||
|
||||
async def process_workflow_completion(
|
||||
_ctx,
|
||||
workflow_run_id: int,
|
||||
audio_temp_path: Optional[str] = None,
|
||||
transcript_temp_path: Optional[str] = None,
|
||||
):
|
||||
"""Process workflow completion: upload artifacts and run integrations.
|
||||
|
||||
This task combines audio upload, transcript upload, and webhook integrations
|
||||
into a single sequential task to ensure integrations run after uploads complete.
|
||||
|
||||
Args:
|
||||
_ctx: ARQ context (unused)
|
||||
workflow_run_id: The workflow run ID
|
||||
audio_temp_path: Optional path to temp audio file
|
||||
transcript_temp_path: Optional path to temp transcript file
|
||||
"""
|
||||
run_id = str(workflow_run_id)
|
||||
set_current_run_id(run_id)
|
||||
|
||||
logger.info(f"Processing workflow completion for run {workflow_run_id}")
|
||||
|
||||
storage_backend = get_current_storage_backend()
|
||||
|
||||
# Step 1: Upload audio if provided
|
||||
if audio_temp_path:
|
||||
try:
|
||||
if os.path.exists(audio_temp_path):
|
||||
file_size = os.path.getsize(audio_temp_path)
|
||||
logger.debug(f"Audio file size: {file_size} bytes")
|
||||
|
||||
recording_url = f"recordings/{workflow_run_id}.wav"
|
||||
logger.info(
|
||||
f"Uploading audio to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
|
||||
await storage_fs.aupload_file(audio_temp_path, recording_url)
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
recording_url=recording_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
logger.info(f"Successfully uploaded audio: {recording_url}")
|
||||
else:
|
||||
logger.warning(f"Audio temp file not found: {audio_temp_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading audio for workflow {workflow_run_id}: {e}")
|
||||
finally:
|
||||
if audio_temp_path and os.path.exists(audio_temp_path):
|
||||
try:
|
||||
os.remove(audio_temp_path)
|
||||
logger.debug(f"Cleaned up temp audio file: {audio_temp_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp audio file: {e}")
|
||||
|
||||
# Step 2: Upload transcript if provided
|
||||
if transcript_temp_path:
|
||||
try:
|
||||
if os.path.exists(transcript_temp_path):
|
||||
file_size = os.path.getsize(transcript_temp_path)
|
||||
logger.debug(f"Transcript file size: {file_size} bytes")
|
||||
|
||||
transcript_url = f"transcripts/{workflow_run_id}.txt"
|
||||
logger.info(
|
||||
f"Uploading transcript to {storage_backend.name} - workflow_run_id: {workflow_run_id}"
|
||||
)
|
||||
|
||||
await storage_fs.aupload_file(transcript_temp_path, transcript_url)
|
||||
await db_client.update_workflow_run(
|
||||
run_id=workflow_run_id,
|
||||
transcript_url=transcript_url,
|
||||
storage_backend=storage_backend.value,
|
||||
)
|
||||
logger.info(f"Successfully uploaded transcript: {transcript_url}")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Transcript temp file not found: {transcript_temp_path}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error uploading transcript for workflow {workflow_run_id}: {e}"
|
||||
)
|
||||
finally:
|
||||
if transcript_temp_path and os.path.exists(transcript_temp_path):
|
||||
try:
|
||||
os.remove(transcript_temp_path)
|
||||
logger.debug(
|
||||
f"Cleaned up temp transcript file: {transcript_temp_path}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp transcript file: {e}")
|
||||
|
||||
# Step 3: Run webhook integrations (after uploads are complete)
|
||||
try:
|
||||
await run_integrations_post_workflow_run(_ctx, workflow_run_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error running integrations for workflow {workflow_run_id}: {e}")
|
||||
|
||||
logger.info(f"Completed workflow completion processing for run {workflow_run_id}")
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
|
@ -28,6 +28,87 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|||
START_CALL_SYSTEM_PROMPT = "start_call_system_prompt"
|
||||
END_CALL_SYSTEM_PROMPT = "end_call_system_prompt"
|
||||
|
||||
# Default workflow definition for mocking database WorkflowModel
|
||||
DEFAULT_WORKFLOW_DEFINITION = {
|
||||
"nodes": [
|
||||
{
|
||||
"id": "1",
|
||||
"type": "startCall",
|
||||
"position": {"x": 0, "y": 0},
|
||||
"data": {
|
||||
"name": "Start",
|
||||
"prompt": START_CALL_SYSTEM_PROMPT,
|
||||
"is_start": True,
|
||||
"allow_interrupt": False,
|
||||
"add_global_prompt": False,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"type": "endCall",
|
||||
"position": {"x": 0, "y": 200},
|
||||
"data": {
|
||||
"name": "End",
|
||||
"prompt": END_CALL_SYSTEM_PROMPT,
|
||||
"is_end": True,
|
||||
"allow_interrupt": False,
|
||||
"add_global_prompt": False,
|
||||
},
|
||||
},
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"id": "1-2",
|
||||
"source": "1",
|
||||
"target": "2",
|
||||
"data": {"label": "End", "condition": "End the call"},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockWorkflowModel:
|
||||
"""Mock database WorkflowModel for testing.
|
||||
|
||||
This mimics the structure of the database WorkflowModel, not the parsed WorkflowGraph.
|
||||
Use this when mocking db_client.get_workflow() responses.
|
||||
"""
|
||||
|
||||
workflow_id: int = 1
|
||||
organization_id: int = 1
|
||||
workflow_configurations: Dict[str, Any] = field(default_factory=dict)
|
||||
workflow_definition_with_fallback: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.workflow_definition_with_fallback:
|
||||
self.workflow_definition_with_fallback = DEFAULT_WORKFLOW_DEFINITION.copy()
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockWorkflowRun:
|
||||
"""Mock database WorkflowRun for testing.
|
||||
|
||||
Use this when mocking db_client.get_workflow_run() responses.
|
||||
"""
|
||||
|
||||
is_completed: bool = False
|
||||
initial_context: Dict[str, Any] = field(default_factory=dict)
|
||||
gathered_context: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockUserConfig:
|
||||
"""Mock user configuration for testing.
|
||||
|
||||
Use this when mocking db_client.get_user_configurations() responses.
|
||||
"""
|
||||
|
||||
stt: Optional[Any] = None
|
||||
tts: Optional[Any] = None
|
||||
llm: Optional[Any] = None
|
||||
embeddings: Optional[Any] = None
|
||||
|
||||
|
||||
class MockTransportProcessor(FrameProcessor):
|
||||
"""
|
||||
|
|
@ -41,7 +122,7 @@ class MockTransportProcessor(FrameProcessor):
|
|||
|
||||
Args:
|
||||
emit_bot_speaking: If True, also emits BotSpeakingFrame on TTSAudioRawFrame
|
||||
which is needed for UserIdleProcessor to start conversation tracking. Default True.
|
||||
which is needed for user idle tracking to start conversation tracking. Default True.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
|
@ -63,7 +144,7 @@ class MockTransportProcessor(FrameProcessor):
|
|||
BotStartedSpeakingFrame(), direction=FrameDirection.UPSTREAM
|
||||
)
|
||||
elif isinstance(frame, TTSAudioRawFrame):
|
||||
# Emit BotSpeakingFrame - this is what triggers the UserIdleProcessor
|
||||
# Emit BotSpeakingFrame - this is what triggers user idle tracking
|
||||
# to start conversation tracking
|
||||
if self._emit_bot_speaking:
|
||||
await self.push_frame(BotSpeakingFrame())
|
||||
|
|
@ -101,6 +182,24 @@ def mock_engine():
|
|||
return engine
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_workflow_model():
|
||||
"""Create a mock WorkflowModel for testing database responses."""
|
||||
return MockWorkflowModel()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_workflow_run():
|
||||
"""Create a mock WorkflowRun for testing database responses."""
|
||||
return MockWorkflowRun()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user_config():
|
||||
"""Create a mock user configuration for testing."""
|
||||
return MockUserConfig()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_tools():
|
||||
"""Create sample mock tools for testing."""
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
|||
)
|
||||
from pipecat.tests import MockLLMService, MockTTSService
|
||||
|
||||
|
||||
# Define prompts for test nodes
|
||||
START_NODE_PROMPT = "Start Node System Prompt"
|
||||
AGENT_NODE_PROMPT = "Agent Node System Prompt"
|
||||
|
|
@ -143,14 +142,20 @@ class ContextCapturingMockLLM(MockLLMService):
|
|||
msg_copy = dict(msg)
|
||||
# Copy content to avoid reference issues
|
||||
if "content" in msg_copy:
|
||||
msg_copy["content"] = str(msg_copy["content"]) if msg_copy["content"] else None
|
||||
msg_copy["content"] = (
|
||||
str(msg_copy["content"]) if msg_copy["content"] else None
|
||||
)
|
||||
messages_snapshot.append(msg_copy)
|
||||
|
||||
self.captured_contexts.append({
|
||||
"step": self._current_step,
|
||||
"messages": messages_snapshot,
|
||||
"system_prompt": messages_snapshot[0]["content"] if messages_snapshot else None,
|
||||
})
|
||||
self.captured_contexts.append(
|
||||
{
|
||||
"step": self._current_step,
|
||||
"messages": messages_snapshot,
|
||||
"system_prompt": messages_snapshot[0]["content"]
|
||||
if messages_snapshot
|
||||
else None,
|
||||
}
|
||||
)
|
||||
|
||||
# Call parent implementation to stream the mock chunks
|
||||
return await super()._stream_chat_completions_universal_context(context)
|
||||
|
|
@ -306,14 +311,26 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
transition completes. The test verifies the context is still correctly updated.
|
||||
"""
|
||||
# Step 0 (Start node): call collect_info to transition to agent
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
|
||||
])
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "collect_info",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_1",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 1 (Agent node): call end_call to transition to end
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
|
||||
])
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "end_call",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_2",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 2 (End node): text response (end node has no outgoing edges)
|
||||
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
|
||||
|
|
@ -327,7 +344,7 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
)
|
||||
|
||||
# Should have been called 3 times: start node, agent node, end node
|
||||
assert llm.get_current_step() == 2, (
|
||||
assert llm.get_current_step() == 3, (
|
||||
f"Expected 3 LLM generations (start, agent, end), got {llm.get_current_step()}"
|
||||
)
|
||||
|
||||
|
|
@ -376,14 +393,26 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
is handled correctly.
|
||||
"""
|
||||
# Step 0 (Start node): call collect_info to transition to agent
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
|
||||
])
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "collect_info",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_1",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 1 (Agent node): call end_call to transition to end
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
|
||||
])
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "end_call",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_2",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 2 (End node): text response
|
||||
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
|
||||
|
|
@ -397,7 +426,7 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
)
|
||||
|
||||
# Verify all three nodes were executed
|
||||
assert llm.get_current_step() == 2, (
|
||||
assert llm.get_current_step() == 3, (
|
||||
f"Expected 3 steps, got {llm.get_current_step()}"
|
||||
)
|
||||
|
||||
|
|
@ -408,8 +437,7 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
assert AGENT_NODE_PROMPT in llm.get_system_prompt_at_step(1)
|
||||
|
||||
# Step 2: End node - should have end prompt
|
||||
# FIXME - EndFrame is getting processed before LLMContextFrame
|
||||
# assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)
|
||||
assert END_NODE_PROMPT in llm.get_system_prompt_at_step(2)
|
||||
|
||||
# Verify each subsequent step has the previous tool results
|
||||
step_1_ctx = llm.get_context_at_step(1)
|
||||
|
|
@ -423,14 +451,14 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
assert step_1_has_tool, "Agent node should see collect_info tool result"
|
||||
|
||||
# Step 2 should have tool results from both transitions
|
||||
# FIXME - EndFrame is getting processed before LLMContextFrame
|
||||
# step_2_tool_messages = [
|
||||
# msg for msg in step_2_ctx["messages"]
|
||||
# if msg.get("role") == "tool" or msg.get("tool_call_id")
|
||||
# ]
|
||||
# assert len(step_2_tool_messages) >= 2, (
|
||||
# f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
|
||||
# )
|
||||
step_2_tool_messages = [
|
||||
msg
|
||||
for msg in step_2_ctx["messages"]
|
||||
if msg.get("role") == "tool" or msg.get("tool_call_id")
|
||||
]
|
||||
assert len(step_2_tool_messages) >= 2, (
|
||||
f"End node should see at least 2 tool results, got {len(step_2_tool_messages)}"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_context_messages_preserve_conversation_history(
|
||||
|
|
@ -444,14 +472,26 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
- Tool call messages and results
|
||||
"""
|
||||
# Step 0 (Start node): call collect_info to transition to agent
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "collect_info", "arguments": {}, "tool_call_id": "call_transition_1"},
|
||||
])
|
||||
step_0_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "collect_info",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_1",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 1 (Agent node): call end_call to transition to end
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks([
|
||||
{"name": "end_call", "arguments": {}, "tool_call_id": "call_transition_2"},
|
||||
])
|
||||
step_1_chunks = MockLLMService.create_multiple_function_call_chunks(
|
||||
[
|
||||
{
|
||||
"name": "end_call",
|
||||
"arguments": {},
|
||||
"tool_call_id": "call_transition_2",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Step 2 (End node): text response
|
||||
step_2_chunks = MockLLMService.create_text_chunks("Goodbye!")
|
||||
|
|
@ -472,18 +512,15 @@ class TestContextUpdateBeforeNextCompletion:
|
|||
assert len(ctx_1["messages"]) > len(ctx_0["messages"]), (
|
||||
"Context at step 1 should have more messages than step 0"
|
||||
)
|
||||
|
||||
# FIXME
|
||||
# assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
|
||||
# "Context at step 2 should have more messages than step 1"
|
||||
# )
|
||||
|
||||
assert len(ctx_2["messages"]) > len(ctx_1["messages"]), (
|
||||
"Context at step 2 should have more messages than step 1"
|
||||
)
|
||||
|
||||
# Verify assistant messages are accumulated
|
||||
# FIXME
|
||||
# assistant_messages_at_step_2 = [
|
||||
# msg for msg in ctx_2["messages"]
|
||||
# if msg.get("role") == "assistant"
|
||||
# ]
|
||||
# assert len(assistant_messages_at_step_2) >= 2, (
|
||||
# "Should have at least 2 assistant messages by step 2"
|
||||
# )
|
||||
assistant_messages_at_step_2 = [
|
||||
msg for msg in ctx_2["messages"] if msg.get("role") == "assistant"
|
||||
]
|
||||
assert len(assistant_messages_at_step_2) >= 2, (
|
||||
"Should have at least 2 assistant messages by step 2"
|
||||
)
|
||||
|
|
|
|||
100
api/tests/test_pipeline_cancellation.py
Normal file
100
api/tests/test_pipeline_cancellation.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
import asyncio
|
||||
|
||||
import pytest
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
EndTaskFrame,
|
||||
Frame,
|
||||
InterruptionTaskFrame,
|
||||
LLMRunFrame,
|
||||
)
|
||||
from pipecat.pipeline.base_task import PipelineTaskParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class MockTransport(FrameProcessor):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class BusyWaitProcessor(FrameProcessor):
|
||||
def __init__(self, wait_time=5.0, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._wait_time = wait_time
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
if isinstance(frame, LLMRunFrame):
|
||||
# Simulate a delay, which can happen sometimes due to slow LLM Inferencing or
|
||||
# other reasons
|
||||
try:
|
||||
logger.debug(f"{self} sleeping with frame: {frame}")
|
||||
await asyncio.sleep(5)
|
||||
logger.debug(f"{self} woke up with frame: {frame}")
|
||||
except asyncio.CancelledError:
|
||||
logger.debug(f"{self} was cancelled")
|
||||
raise
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_interruption_with_blocked_end_frame():
|
||||
busy_wait_processor = BusyWaitProcessor(wait_time=5)
|
||||
transport = MockTransport()
|
||||
pipeline = Pipeline([transport, busy_wait_processor])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
async def run_pipeline():
|
||||
loop = asyncio.get_running_loop()
|
||||
params = PipelineTaskParams(loop=loop)
|
||||
await task.run(params=params)
|
||||
|
||||
async def queue_frame():
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
# Send EndTaskFrame to simulate EndFrame
|
||||
await asyncio.sleep(0.1)
|
||||
await transport.queue_frame(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
|
||||
|
||||
# Simulate an Interruption, which can happen if the user
|
||||
# has started to speak
|
||||
await asyncio.sleep(0.1)
|
||||
await transport.queue_frame(
|
||||
InterruptionTaskFrame(), direction=FrameDirection.UPSTREAM
|
||||
)
|
||||
|
||||
# Create tasks explicitly for better control
|
||||
pipeline_task = asyncio.create_task(run_pipeline())
|
||||
queue_task = asyncio.create_task(queue_frame())
|
||||
|
||||
# Wait with timeout
|
||||
done, pending = await asyncio.wait(
|
||||
[pipeline_task, queue_task],
|
||||
timeout=1.0,
|
||||
return_when=asyncio.ALL_COMPLETED,
|
||||
)
|
||||
|
||||
# If there are pending tasks, we timed out
|
||||
if pending:
|
||||
# Cancel all pending tasks
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
|
||||
# Give limited time for cleanup, then move on regardless
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
asyncio.gather(*pending, return_exceptions=True),
|
||||
timeout=1.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
pass # Cleanup took too long, continue anyway
|
||||
|
||||
pytest.fail("Test timed out after 1 second")
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
"""
|
||||
Simulates a user idle condition and tests the behaviour
|
||||
of the user idle processor.
|
||||
of the user idle handler.
|
||||
|
||||
This module tests the behavior when the user becomes idle during a conversation,
|
||||
ensuring the UserIdleProcessor properly triggers the callback and the engine
|
||||
handles it correctly.
|
||||
ensuring the user_idle_timeout in LLMUserAggregatorParams properly triggers
|
||||
the on_user_turn_idle event and the engine handles it correctly.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
|
@ -23,8 +23,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
|
|||
from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.tests import MockLLMService, MockTTSService
|
||||
|
||||
|
||||
|
|
@ -32,8 +32,8 @@ async def run_pipeline_with_user_idle(
|
|||
workflow: WorkflowGraph,
|
||||
user_idle_timeout: float = 0.2,
|
||||
mock_steps: list | None = None,
|
||||
) -> tuple[MockLLMService, LLMContext, UserIdleProcessor]:
|
||||
"""Run a pipeline with UserIdleProcessor and simulate user idle condition.
|
||||
) -> tuple[MockLLMService, LLMContext]:
|
||||
"""Run a pipeline with user_idle_timeout and simulate user idle condition.
|
||||
|
||||
Args:
|
||||
workflow: The workflow graph to use.
|
||||
|
|
@ -42,7 +42,7 @@ async def run_pipeline_with_user_idle(
|
|||
defaults to a simple greeting followed by text responses.
|
||||
|
||||
Returns:
|
||||
Tuple of (MockLLMService, LLMContext, UserIdleProcessor) for assertions.
|
||||
Tuple of (MockLLMService, LLMContext) for assertions.
|
||||
"""
|
||||
# Create mock responses - bot will speak first, then respond to idle prompts
|
||||
# Step 1: Initial greeting
|
||||
|
|
@ -64,10 +64,11 @@ async def run_pipeline_with_user_idle(
|
|||
# Create LLM context
|
||||
context = LLMContext()
|
||||
|
||||
# Create context aggregator with both user and assistant aggregators
|
||||
# Create context aggregator with user_idle_timeout in user_params
|
||||
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
|
||||
user_params = LLMUserAggregatorParams(user_idle_timeout=user_idle_timeout)
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, assistant_params=assistant_params
|
||||
context, assistant_params=assistant_params, user_params=user_params
|
||||
)
|
||||
user_context_aggregator = context_aggregator.user()
|
||||
assistant_context_aggregator = context_aggregator.assistant()
|
||||
|
|
@ -81,18 +82,20 @@ async def run_pipeline_with_user_idle(
|
|||
workflow_run_id=1,
|
||||
)
|
||||
|
||||
# Create UserIdleProcessor with engine's callback and a short timeout
|
||||
user_idle_processor = UserIdleProcessor(
|
||||
callback=engine.create_user_idle_callback(),
|
||||
timeout=user_idle_timeout,
|
||||
)
|
||||
# Register user idle event handlers
|
||||
user_idle_handler = engine.create_user_idle_handler()
|
||||
|
||||
# Build the pipeline:
|
||||
# llm -> mock_transport -> user_idle_processor -> assistant_context_aggregator
|
||||
# The user_context_aggregator would normally be at the start for user input
|
||||
@user_context_aggregator.event_handler("on_user_turn_idle")
|
||||
async def on_user_turn_idle(aggregator):
|
||||
await user_idle_handler.handle_idle(aggregator)
|
||||
|
||||
@user_context_aggregator.event_handler("on_user_turn_started")
|
||||
async def on_user_turn_started(aggregator, strategy):
|
||||
user_idle_handler.reset()
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
user_idle_processor,
|
||||
user_context_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
|
|
@ -154,11 +157,11 @@ async def run_pipeline_with_user_idle(
|
|||
return_exceptions=True,
|
||||
)
|
||||
|
||||
return llm, context, user_idle_processor
|
||||
return llm, context
|
||||
|
||||
|
||||
class TestUserIdleHandler:
|
||||
"""Test user idle handling through PipecatEngine and UserIdleProcessor."""
|
||||
"""Test user idle handling through PipecatEngine and UserIdleHandler."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_idle_triggers_callback(self, simple_workflow: WorkflowGraph):
|
||||
|
|
@ -167,13 +170,13 @@ class TestUserIdleHandler:
|
|||
This test verifies that when:
|
||||
1. The bot starts speaking (triggers conversation tracking)
|
||||
2. No user input is received for the timeout period
|
||||
3. The UserIdleProcessor triggers the idle callback
|
||||
3. The on_user_turn_idle event triggers the idle handler
|
||||
|
||||
The engine's user idle callback should:
|
||||
The engine's user idle handler should:
|
||||
- First retry: Send a message asking if user is still there
|
||||
- Second retry: Send goodbye message and end the call
|
||||
"""
|
||||
llm, context, user_idle_processor = await run_pipeline_with_user_idle(
|
||||
llm, context = await run_pipeline_with_user_idle(
|
||||
workflow=simple_workflow,
|
||||
user_idle_timeout=0.2, # Short timeout for faster test
|
||||
)
|
||||
|
|
@ -220,7 +223,7 @@ class TestUserIdleHandler:
|
|||
MockLLMService.create_text_chunks("Response 3"),
|
||||
]
|
||||
|
||||
llm, context, user_idle_processor = await run_pipeline_with_user_idle(
|
||||
llm, context = await run_pipeline_with_user_idle(
|
||||
workflow=three_node_workflow,
|
||||
user_idle_timeout=0.2,
|
||||
mock_steps=mock_steps,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
postgres:
|
||||
image: postgres:17
|
||||
image: pgvector/pgvector:pg17
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
|
|
@ -83,6 +83,10 @@ services:
|
|||
ENVIRONMENT: "local"
|
||||
LOG_LEVEL: "INFO"
|
||||
|
||||
# Replace this environment variable if you are using a custom
|
||||
# domain to host the stack
|
||||
BACKEND_API_ENDPOINT: "http://localhost:8000"
|
||||
|
||||
# Database configuration (using containerized postgres)
|
||||
DATABASE_URL: "postgresql+asyncpg://postgres:postgres@postgres:5432/postgres"
|
||||
|
||||
|
|
|
|||
|
|
@ -162,6 +162,10 @@ server {
|
|||
}
|
||||
```
|
||||
|
||||
### Add environment variable
|
||||
|
||||
Replace `BACKEND_API_ENDPOINT` environment variable the `docker-compose.yaml` with your custom domain with the scheme.
|
||||
|
||||
### Start Dograh Services
|
||||
|
||||
Start Dograh with the updated configuration:
|
||||
|
|
|
|||
135
evals/stt/README.md
Normal file
135
evals/stt/README.md
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
# STT Evaluation Benchmark
|
||||
|
||||
Benchmark for comparing Speech-to-Text providers using **WebSocket streaming** with focus on:
|
||||
- **Speaker diarization** - identifying who said what
|
||||
- **Keyterm boosting** - improving recognition of specific terms (Deepgram)
|
||||
|
||||
## Providers
|
||||
|
||||
| Provider | Diarization | Keyterm Boost | Streaming |
|
||||
|----------|-------------|---------------|-----------|
|
||||
| Deepgram | Yes | Yes | WebSocket (v1/v2) |
|
||||
| Speechmatics | Yes | Additional vocab | WebSocket RT |
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install websockets
|
||||
|
||||
# Set API keys
|
||||
export DEEPGRAM_API_KEY="your-key"
|
||||
export SPEECHMATICS_API_KEY="your-key"
|
||||
```
|
||||
|
||||
**Note:** Requires `ffmpeg` installed for audio conversion to PCM16.
|
||||
|
||||
## Usage
|
||||
|
||||
Run from the project root directory:
|
||||
|
||||
```bash
|
||||
# Test both providers with diarization
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
|
||||
|
||||
# Test only Deepgram
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
|
||||
|
||||
# Test with keyterm boosting (Deepgram)
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
|
||||
|
||||
# Use different sample rate (default: 8000 Hz)
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --sample-rate 16000
|
||||
|
||||
# Show word-level timings
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --show-words
|
||||
|
||||
# Save results to JSON
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --save
|
||||
```
|
||||
|
||||
## CLI Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `audio_file` | Path to audio file (relative to evals/stt/ or absolute) |
|
||||
| `--providers` | Providers to test: `deepgram`, `speechmatics` (default: both) |
|
||||
| `--diarize` | Enable speaker diarization |
|
||||
| `--keyterms` | Keywords to boost (Deepgram) / additional vocab (Speechmatics) |
|
||||
| `--language` | Language code (default: en) |
|
||||
| `--sample-rate` | Audio sample rate for streaming (default: 8000) |
|
||||
| `--show-words` | Show individual word timings |
|
||||
| `--save` | Save results to JSON in `results/` |
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
evals/stt/
|
||||
├── audio/ # Audio test files
|
||||
│ └── multi_speaker.m4a
|
||||
├── results/ # Saved benchmark results (JSON)
|
||||
├── providers/ # STT provider implementations
|
||||
│ ├── base.py # Base classes
|
||||
│ ├── deepgram_provider.py # WebSocket streaming
|
||||
│ └── speechmatics_provider.py # WebSocket streaming
|
||||
├── audio_streamer.py # PCM16 audio file streamer
|
||||
├── benchmark.py # Main runner script
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Audio Conversion**: The `AudioStreamer` converts any audio file to raw PCM16 using ffmpeg
|
||||
2. **WebSocket Connection**: Providers connect to their respective WebSocket APIs
|
||||
3. **Streaming**: Audio is sent in chunks (configurable sample rate, default 8kHz)
|
||||
4. **Result Collection**: Transcripts and speaker info are collected from WebSocket responses
|
||||
5. **Comparison**: Results are parsed into a common format for comparison
|
||||
|
||||
## Output Example
|
||||
|
||||
```
|
||||
Audio file: /path/to/audio/multi_speaker.m4a
|
||||
Providers: ['deepgram', 'speechmatics']
|
||||
Diarization: True
|
||||
Sample rate: 8000 Hz
|
||||
|
||||
============================================================
|
||||
Provider: DEEPGRAM
|
||||
============================================================
|
||||
|
||||
Duration: 45.32s
|
||||
Speakers detected: 2 - ['0', '1']
|
||||
|
||||
Transcript:
|
||||
Hello, welcome to the demo...
|
||||
|
||||
--- Speaker Segments ---
|
||||
[0.0s] Speaker 0: Hello, welcome to the demo.
|
||||
[2.5s] Speaker 1: Thanks for having me.
|
||||
...
|
||||
|
||||
============================================================
|
||||
COMPARISON SUMMARY
|
||||
============================================================
|
||||
|
||||
Provider Duration Speakers Words
|
||||
---------------------------------------------
|
||||
deepgram 45.32 2 312
|
||||
speechmatics 45.32 2 308
|
||||
```
|
||||
|
||||
## Adding New Providers
|
||||
|
||||
1. Create a new file in `providers/` (e.g., `whisper_provider.py`)
|
||||
2. Implement the `STTProvider` abstract class with WebSocket streaming
|
||||
3. Use `AudioStreamer` for PCM16 conversion
|
||||
4. Add to `providers/__init__.py`
|
||||
5. Add to `benchmark.py` provider choices
|
||||
|
||||
## API Documentation
|
||||
|
||||
- Deepgram Streaming: https://developers.deepgram.com/docs/live-streaming-audio
|
||||
- Deepgram Diarization: https://developers.deepgram.com/docs/diarization
|
||||
- Deepgram Keyterms: https://developers.deepgram.com/docs/keyterm
|
||||
- Speechmatics RT API: https://docs.speechmatics.com/rt-api-ref
|
||||
- Speechmatics Diarization: https://docs.speechmatics.com/features/diarization
|
||||
1
evals/stt/__init__.py
Normal file
1
evals/stt/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# STT Evaluation Benchmark
|
||||
BIN
evals/stt/audio/multi_speaker.m4a
Normal file
BIN
evals/stt/audio/multi_speaker.m4a
Normal file
Binary file not shown.
BIN
evals/stt/audio/nope.m4a
Normal file
BIN
evals/stt/audio/nope.m4a
Normal file
Binary file not shown.
BIN
evals/stt/audio/not_so_sure.m4a
Normal file
BIN
evals/stt/audio/not_so_sure.m4a
Normal file
Binary file not shown.
BIN
evals/stt/audio/vad.m4a
Normal file
BIN
evals/stt/audio/vad.m4a
Normal file
Binary file not shown.
BIN
evals/stt/audio/yes.m4a
Normal file
BIN
evals/stt/audio/yes.m4a
Normal file
Binary file not shown.
140
evals/stt/audio_streamer.py
Normal file
140
evals/stt/audio_streamer.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Audio file streamer - converts audio files to PCM16 streams."""
|
||||
|
||||
import asyncio
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import AsyncIterator
|
||||
|
||||
|
||||
@dataclass
|
||||
class AudioConfig:
|
||||
"""Audio streaming configuration."""
|
||||
|
||||
sample_rate: int = 8000
|
||||
channels: int = 1
|
||||
sample_width: int = 2 # 16-bit = 2 bytes
|
||||
chunk_duration_ms: int = 80 # Send chunks every 80ms
|
||||
|
||||
@property
|
||||
def chunk_size(self) -> int:
|
||||
"""Bytes per chunk based on duration."""
|
||||
samples_per_chunk = int(self.sample_rate * self.chunk_duration_ms / 1000)
|
||||
return samples_per_chunk * self.channels * self.sample_width
|
||||
|
||||
|
||||
class AudioStreamer:
|
||||
"""Streams audio files as PCM16 chunks.
|
||||
|
||||
Converts any audio format to raw PCM16 using ffmpeg and streams
|
||||
in real-time chunks to simulate live audio.
|
||||
"""
|
||||
|
||||
def __init__(self, config: AudioConfig | None = None):
|
||||
self.config = config or AudioConfig()
|
||||
|
||||
def convert_to_pcm16(self, audio_path: Path) -> bytes:
|
||||
"""Convert audio file to raw PCM16 bytes using ffmpeg.
|
||||
|
||||
Args:
|
||||
audio_path: Path to input audio file
|
||||
|
||||
Returns:
|
||||
Raw PCM16 audio bytes
|
||||
"""
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
str(audio_path),
|
||||
"-f",
|
||||
"s16le", # signed 16-bit little-endian
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
str(self.config.sample_rate),
|
||||
"-ac",
|
||||
str(self.config.channels),
|
||||
"-", # output to stdout
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
return result.stdout
|
||||
|
||||
async def stream_file(
|
||||
self,
|
||||
audio_path: Path,
|
||||
realtime: bool = True,
|
||||
trailing_silence_seconds: float = 0.0,
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""Stream audio file as PCM16 chunks.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
realtime: If True, add delays to simulate real-time streaming
|
||||
trailing_silence_seconds: Seconds of silence to append after audio ends.
|
||||
Useful for capturing pending end-of-turn events from STT providers.
|
||||
|
||||
Yields:
|
||||
PCM16 audio chunks
|
||||
"""
|
||||
# Convert entire file to PCM16
|
||||
pcm_data = self.convert_to_pcm16(audio_path)
|
||||
|
||||
chunk_size = self.config.chunk_size
|
||||
delay = self.config.chunk_duration_ms / 1000.0 if realtime else 0
|
||||
|
||||
# Stream audio chunks
|
||||
for i in range(0, len(pcm_data), chunk_size):
|
||||
chunk = pcm_data[i : i + chunk_size]
|
||||
if chunk:
|
||||
yield chunk
|
||||
if realtime and delay > 0:
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# Stream trailing silence if requested
|
||||
if trailing_silence_seconds > 0:
|
||||
silence_chunk = bytes(chunk_size) # Zero-filled bytes = silence
|
||||
num_silence_chunks = int(trailing_silence_seconds / (self.config.chunk_duration_ms / 1000.0))
|
||||
|
||||
for _ in range(num_silence_chunks):
|
||||
yield silence_chunk
|
||||
if realtime and delay > 0:
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
async def stream_file_fast(self, audio_path: Path) -> AsyncIterator[bytes]:
|
||||
"""Stream audio file as fast as possible (no real-time delay).
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
|
||||
Yields:
|
||||
PCM16 audio chunks
|
||||
"""
|
||||
async for chunk in self.stream_file(audio_path, realtime=False):
|
||||
yield chunk
|
||||
|
||||
def get_duration(self, audio_path: Path) -> float:
|
||||
"""Get audio file duration in seconds.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
|
||||
Returns:
|
||||
Duration in seconds
|
||||
"""
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=duration",
|
||||
"-of",
|
||||
"default=noprint_wrappers=1:nokey=1",
|
||||
str(audio_path),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return float(result.stdout.strip())
|
||||
247
evals/stt/benchmark.py
Normal file
247
evals/stt/benchmark.py
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
#!/usr/bin/env python3
|
||||
"""STT Benchmark Runner.
|
||||
|
||||
Compare speech-to-text transcription across providers with focus on:
|
||||
- Speaker diarization accuracy
|
||||
- Keyword/keyterm recognition
|
||||
- Transcription quality
|
||||
|
||||
Usage:
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --keyterms "Dograh" "Pipecat"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from evals.stt.providers import (
|
||||
DeepgramProvider,
|
||||
DeepgramFluxProvider,
|
||||
SpeechmaticsProvider,
|
||||
LocalSmartTurnProvider,
|
||||
STTProvider,
|
||||
TranscriptionResult,
|
||||
)
|
||||
|
||||
|
||||
def get_provider(name: str) -> STTProvider:
|
||||
"""Get provider instance by name."""
|
||||
providers = {
|
||||
"deepgram": DeepgramProvider,
|
||||
"deepgram-flux": DeepgramFluxProvider,
|
||||
"speechmatics": SpeechmaticsProvider,
|
||||
"local-smart-turn": LocalSmartTurnProvider,
|
||||
}
|
||||
if name not in providers:
|
||||
raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
|
||||
return providers[name]()
|
||||
|
||||
|
||||
async def run_transcription(
|
||||
provider: STTProvider,
|
||||
audio_path: Path,
|
||||
diarize: bool = False,
|
||||
keyterms: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Run transcription with a provider."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Provider: {provider.name.upper()}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
try:
|
||||
result = await provider.transcribe(
|
||||
audio_path,
|
||||
diarize=diarize,
|
||||
keyterms=keyterms,
|
||||
**kwargs,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"Error with {provider.name}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def print_result(result: TranscriptionResult, show_words: bool = False) -> None:
|
||||
"""Print transcription result."""
|
||||
print(f"\nDuration: {result.duration:.2f}s")
|
||||
print(f"Speakers detected: {len(result.speakers)} - {result.speakers}")
|
||||
print(f"\nTranscript:\n{result.transcript}")
|
||||
|
||||
if result.speakers:
|
||||
print(f"\n--- Speaker Segments ---")
|
||||
for segment in result.get_speaker_segments():
|
||||
speaker = segment["speaker"] or "?"
|
||||
text = segment["text"]
|
||||
start = segment["start"]
|
||||
print(f"[{start:.1f}s] Speaker {speaker}: {text}")
|
||||
|
||||
if show_words:
|
||||
print(f"\n--- Words ---")
|
||||
for word in result.words[:50]: # First 50 words
|
||||
speaker_info = f" (S{word.speaker})" if word.speaker else ""
|
||||
print(f" {word.start:.2f}s: {word.word}{speaker_info} [{word.confidence:.2f}]")
|
||||
if len(result.words) > 50:
|
||||
print(f" ... and {len(result.words) - 50} more words")
|
||||
|
||||
|
||||
def save_results(
|
||||
results: list[TranscriptionResult],
|
||||
output_dir: Path,
|
||||
audio_name: str,
|
||||
) -> Path:
|
||||
"""Save results to JSON file."""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_file = output_dir / f"{audio_name}_{timestamp}.json"
|
||||
|
||||
output_data = {
|
||||
"timestamp": timestamp,
|
||||
"audio_file": audio_name,
|
||||
"results": [r.to_dict() for r in results],
|
||||
}
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(output_data, f, indent=2)
|
||||
|
||||
print(f"\nResults saved to: {output_file}")
|
||||
return output_file
|
||||
|
||||
|
||||
def compare_results(results: list[TranscriptionResult]) -> None:
|
||||
"""Compare results across providers."""
|
||||
if len(results) < 2:
|
||||
return
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("COMPARISON SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
|
||||
print(f"\n{'Provider':<15} {'Duration':<10} {'Speakers':<10} {'Words':<10}")
|
||||
print("-" * 45)
|
||||
for r in results:
|
||||
print(f"{r.provider:<15} {r.duration:<10.2f} {len(r.speakers):<10} {len(r.words):<10}")
|
||||
|
||||
# Compare speaker counts
|
||||
speaker_counts = {r.provider: len(r.speakers) for r in results}
|
||||
if len(set(speaker_counts.values())) > 1:
|
||||
print(f"\nNote: Providers detected different speaker counts: {speaker_counts}")
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="STT Benchmark - Compare transcription providers",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --diarize --providers deepgram
|
||||
python -m evals.stt.benchmark audio/multi_speaker.m4a --keyterms "Dograh" "API"
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"audio_file",
|
||||
type=str,
|
||||
help="Path to audio file (relative to evals/stt/ or absolute)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--providers",
|
||||
nargs="+",
|
||||
default=["deepgram", "speechmatics"],
|
||||
choices=["deepgram", "deepgram-flux", "speechmatics", "local-smart-turn"],
|
||||
help="Providers to test (default: all)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--diarize",
|
||||
action="store_true",
|
||||
help="Enable speaker diarization",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keyterms",
|
||||
nargs="+",
|
||||
help="Keywords to boost (Deepgram only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--language",
|
||||
default="en",
|
||||
help="Language code (default: en)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample-rate",
|
||||
type=int,
|
||||
default=8000,
|
||||
help="Audio sample rate for streaming (default: 8000)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--show-words",
|
||||
action="store_true",
|
||||
help="Show individual word timings",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save",
|
||||
action="store_true",
|
||||
help="Save results to JSON file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="results",
|
||||
help="Output directory for results (default: results)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve audio path
|
||||
script_dir = Path(__file__).parent
|
||||
audio_path = Path(args.audio_file)
|
||||
if not audio_path.is_absolute():
|
||||
audio_path = script_dir / audio_path
|
||||
|
||||
if not audio_path.exists():
|
||||
print(f"Error: Audio file not found: {audio_path}")
|
||||
return 1
|
||||
|
||||
print(f"Audio file: {audio_path}")
|
||||
print(f"Providers: {args.providers}")
|
||||
print(f"Diarization: {args.diarize}")
|
||||
print(f"Sample rate: {args.sample_rate} Hz")
|
||||
if args.keyterms:
|
||||
print(f"Keyterms: {args.keyterms}")
|
||||
|
||||
results: list[TranscriptionResult] = []
|
||||
|
||||
for provider_name in args.providers:
|
||||
try:
|
||||
provider = get_provider(provider_name)
|
||||
result = await run_transcription(
|
||||
provider,
|
||||
audio_path,
|
||||
diarize=args.diarize,
|
||||
keyterms=args.keyterms,
|
||||
language=args.language,
|
||||
sample_rate=args.sample_rate,
|
||||
)
|
||||
print_result(result, show_words=args.show_words)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
print(f"\nFailed to run {provider_name}: {e}")
|
||||
continue
|
||||
|
||||
if len(results) > 1:
|
||||
compare_results(results)
|
||||
|
||||
if args.save and results:
|
||||
output_dir = script_dir / args.output_dir
|
||||
save_results(results, output_dir, audio_path.stem)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
251
evals/stt/event_capture.py
Normal file
251
evals/stt/event_capture.py
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
#!/usr/bin/env python3
|
||||
"""STT Event Capture Runner.
|
||||
|
||||
Streams audio to STT providers and captures raw WebSocket events with timestamps
|
||||
for visualization in the web UI.
|
||||
|
||||
Usage:
|
||||
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
|
||||
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
from evals.stt.audio_streamer import AudioStreamer
|
||||
from evals.stt.providers import (
|
||||
DeepgramFluxProvider,
|
||||
DeepgramProvider,
|
||||
SpeechmaticsProvider,
|
||||
STTProvider,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CapturedEvent:
|
||||
"""A captured WebSocket event with timestamp."""
|
||||
|
||||
timestamp: float # Time since stream start (seconds)
|
||||
event_type: str # e.g., "Results", "TurnInfo", "AddTranscript"
|
||||
data: dict[str, Any] # Raw event payload
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"event_type": self.event_type,
|
||||
"data": self.data,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class EventCaptureResult:
|
||||
"""Result from event capture session."""
|
||||
|
||||
audio_file: str
|
||||
audio_path: str # Relative path to audio from results dir
|
||||
provider: str
|
||||
duration: float
|
||||
created_at: str
|
||||
events: list[CapturedEvent] = field(default_factory=list)
|
||||
transcript: str = "" # Final transcript for reference
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"audio_file": self.audio_file,
|
||||
"audio_path": self.audio_path,
|
||||
"provider": self.provider,
|
||||
"duration": self.duration,
|
||||
"created_at": self.created_at,
|
||||
"events": [e.to_dict() for e in self.events],
|
||||
"transcript": self.transcript,
|
||||
}
|
||||
|
||||
|
||||
EventCallback = Callable[[str, dict[str, Any]], None]
|
||||
|
||||
|
||||
def get_provider(name: str) -> STTProvider:
|
||||
"""Get provider instance by name."""
|
||||
providers = {
|
||||
"deepgram": DeepgramProvider,
|
||||
"deepgram-flux": DeepgramFluxProvider,
|
||||
"speechmatics": SpeechmaticsProvider,
|
||||
}
|
||||
if name not in providers:
|
||||
raise ValueError(f"Unknown provider: {name}. Available: {list(providers.keys())}")
|
||||
return providers[name]()
|
||||
|
||||
|
||||
async def capture_events(
|
||||
provider: STTProvider,
|
||||
audio_path: Path,
|
||||
sample_rate: int = 8000,
|
||||
**kwargs: Any,
|
||||
) -> EventCaptureResult:
|
||||
"""Capture WebSocket events from a provider.
|
||||
|
||||
Args:
|
||||
provider: The STT provider to use
|
||||
audio_path: Path to the audio file
|
||||
sample_rate: Audio sample rate
|
||||
**kwargs: Additional provider parameters
|
||||
|
||||
Returns:
|
||||
EventCaptureResult with all captured events
|
||||
"""
|
||||
# Get audio duration
|
||||
streamer = AudioStreamer()
|
||||
duration = streamer.get_duration(audio_path)
|
||||
|
||||
# Event list and start time
|
||||
events: list[CapturedEvent] = []
|
||||
start_time: float | None = None
|
||||
|
||||
def on_event(event_type: str, data: dict[str, Any]) -> None:
|
||||
"""Callback for capturing events."""
|
||||
nonlocal start_time
|
||||
if start_time is None:
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
timestamp = asyncio.get_event_loop().time() - start_time
|
||||
events.append(CapturedEvent(timestamp=timestamp, event_type=event_type, data=data))
|
||||
|
||||
# Run transcription with event callback
|
||||
result = await provider.transcribe(
|
||||
audio_path,
|
||||
sample_rate=sample_rate,
|
||||
on_event=on_event,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return EventCaptureResult(
|
||||
audio_file=audio_path.name,
|
||||
audio_path=f"../audio/{audio_path.name}",
|
||||
provider=provider.name,
|
||||
duration=duration,
|
||||
created_at=datetime.now().isoformat(),
|
||||
events=events,
|
||||
transcript=result.transcript,
|
||||
)
|
||||
|
||||
|
||||
def save_result(result: EventCaptureResult, output_dir: Path) -> Path:
|
||||
"""Save capture result to JSON file.
|
||||
|
||||
Args:
|
||||
result: The capture result to save
|
||||
output_dir: Directory to save results
|
||||
|
||||
Returns:
|
||||
Path to the saved file
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Format: {audio_name}-{provider}.json
|
||||
audio_name = Path(result.audio_file).stem
|
||||
output_file = output_dir / f"{audio_name}-{result.provider}.json"
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
json.dump(result.to_dict(), f, indent=2)
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
async def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="STT Event Capture - Capture WebSocket events for visualization",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
|
||||
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider speechmatics --diarize
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"audio_file",
|
||||
type=str,
|
||||
help="Path to audio file (relative to evals/stt/ or absolute)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
required=True,
|
||||
choices=["deepgram", "deepgram-flux", "speechmatics"],
|
||||
help="STT provider to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample-rate",
|
||||
type=int,
|
||||
default=8000,
|
||||
help="Audio sample rate for streaming (default: 8000)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--diarize",
|
||||
action="store_true",
|
||||
help="Enable speaker diarization",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="results",
|
||||
help="Output directory for results (default: results)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve audio path
|
||||
script_dir = Path(__file__).parent
|
||||
audio_path = Path(args.audio_file)
|
||||
if not audio_path.is_absolute():
|
||||
audio_path = script_dir / audio_path
|
||||
|
||||
if not audio_path.exists():
|
||||
print(f"Error: Audio file not found: {audio_path}")
|
||||
return 1
|
||||
|
||||
print(f"Audio file: {audio_path}")
|
||||
print(f"Provider: {args.provider}")
|
||||
print(f"Sample rate: {args.sample_rate} Hz")
|
||||
print(f"Diarization: {args.diarize}")
|
||||
|
||||
try:
|
||||
provider = get_provider(args.provider)
|
||||
print(f"\nCapturing events from {provider.name}...")
|
||||
|
||||
result = await capture_events(
|
||||
provider,
|
||||
audio_path,
|
||||
sample_rate=args.sample_rate,
|
||||
diarize=args.diarize,
|
||||
)
|
||||
|
||||
output_dir = script_dir / args.output_dir
|
||||
output_file = save_result(result, output_dir)
|
||||
|
||||
print(f"\nCapture complete!")
|
||||
print(f" Duration: {result.duration:.2f}s")
|
||||
print(f" Events: {len(result.events)}")
|
||||
print(f" Saved to: {output_file}")
|
||||
|
||||
# Show first few events
|
||||
print(f"\nFirst 5 events:")
|
||||
for event in result.events[:5]:
|
||||
print(f" [{event.timestamp:.2f}s] {event.event_type}")
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(main()))
|
||||
16
evals/stt/providers/__init__.py
Normal file
16
evals/stt/providers/__init__.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
from .base import EventCallback, STTProvider, TranscriptionResult, Word
|
||||
from .deepgram_provider import DeepgramProvider
|
||||
from .deepgram_flux_provider import DeepgramFluxProvider
|
||||
from .speechmatics_provider import SpeechmaticsProvider
|
||||
from .local_smart_turn_provider import LocalSmartTurnProvider
|
||||
|
||||
__all__ = [
|
||||
"EventCallback",
|
||||
"STTProvider",
|
||||
"TranscriptionResult",
|
||||
"Word",
|
||||
"DeepgramProvider",
|
||||
"DeepgramFluxProvider",
|
||||
"SpeechmaticsProvider",
|
||||
"LocalSmartTurnProvider",
|
||||
]
|
||||
128
evals/stt/providers/base.py
Normal file
128
evals/stt/providers/base.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"""Base classes for STT providers."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
# Event callback type: (event_type, data) -> None
|
||||
EventCallback = Callable[[str, dict[str, Any]], None]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Word:
|
||||
"""Represents a transcribed word with metadata."""
|
||||
|
||||
word: str
|
||||
start: float
|
||||
end: float
|
||||
confidence: float
|
||||
speaker: str | None = None
|
||||
speaker_confidence: float | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"word": self.word,
|
||||
"start": self.start,
|
||||
"end": self.end,
|
||||
"confidence": self.confidence,
|
||||
"speaker": self.speaker,
|
||||
"speaker_confidence": self.speaker_confidence,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranscriptionResult:
|
||||
"""Result from STT transcription."""
|
||||
|
||||
provider: str
|
||||
transcript: str
|
||||
words: list[Word]
|
||||
speakers: list[str]
|
||||
duration: float
|
||||
raw_response: dict[str, Any] = field(default_factory=dict)
|
||||
params: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"transcript": self.transcript,
|
||||
"words": [w.to_dict() for w in self.words],
|
||||
"speakers": self.speakers,
|
||||
"duration": self.duration,
|
||||
"params": self.params,
|
||||
}
|
||||
|
||||
def get_speaker_segments(self) -> list[dict[str, Any]]:
|
||||
"""Get transcript segmented by speaker."""
|
||||
if not self.words:
|
||||
return []
|
||||
|
||||
segments = []
|
||||
current_speaker = None
|
||||
current_text = []
|
||||
segment_start = 0.0
|
||||
|
||||
for word in self.words:
|
||||
if word.speaker != current_speaker:
|
||||
if current_text:
|
||||
segments.append(
|
||||
{
|
||||
"speaker": current_speaker,
|
||||
"text": " ".join(current_text),
|
||||
"start": segment_start,
|
||||
"end": self.words[len(segments) - 1].end
|
||||
if segments
|
||||
else word.start,
|
||||
}
|
||||
)
|
||||
current_speaker = word.speaker
|
||||
current_text = [word.word]
|
||||
segment_start = word.start
|
||||
else:
|
||||
current_text.append(word.word)
|
||||
|
||||
if current_text:
|
||||
segments.append(
|
||||
{
|
||||
"speaker": current_speaker,
|
||||
"text": " ".join(current_text),
|
||||
"start": segment_start,
|
||||
"end": self.words[-1].end if self.words else 0.0,
|
||||
}
|
||||
)
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
class STTProvider(ABC):
|
||||
"""Abstract base class for STT providers."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Provider name."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def transcribe(
|
||||
self,
|
||||
audio_path: Path,
|
||||
diarize: bool = False,
|
||||
keyterms: list[str] | None = None,
|
||||
on_event: EventCallback | None = None,
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Transcribe audio file.
|
||||
|
||||
Args:
|
||||
audio_path: Path to the audio file
|
||||
diarize: Enable speaker diarization
|
||||
keyterms: List of keywords to boost (if supported)
|
||||
on_event: Optional callback for raw WebSocket events (event_type, data)
|
||||
**kwargs: Provider-specific parameters
|
||||
|
||||
Returns:
|
||||
TranscriptionResult with transcript and metadata
|
||||
"""
|
||||
pass
|
||||
235
evals/stt/providers/deepgram_flux_provider.py
Normal file
235
evals/stt/providers/deepgram_flux_provider.py
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
"""Deepgram Flux STT provider with WebSocket streaming.
|
||||
|
||||
Flux is Deepgram's conversational AI model with built-in turn detection.
|
||||
It has a different API than Nova models - no language/punctuate/diarize params.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ..audio_streamer import AudioConfig, AudioStreamer
|
||||
from .base import EventCallback, STTProvider, TranscriptionResult, Word
|
||||
|
||||
try:
|
||||
from websockets.asyncio.client import connect as websocket_connect
|
||||
except ImportError:
|
||||
raise ImportError("websockets required: pip install websockets")
|
||||
|
||||
|
||||
class DeepgramFluxProvider(STTProvider):
|
||||
"""Deepgram Flux Speech-to-Text provider with WebSocket streaming.
|
||||
|
||||
Flux is optimized for conversational AI with built-in turn detection.
|
||||
|
||||
Key differences from Nova:
|
||||
- Uses v2 API endpoint
|
||||
- Only supports English (flux-general-en)
|
||||
- No punctuate, diarize, or language params
|
||||
- Has turn detection events (StartOfTurn, EndOfTurn, EagerEndOfTurn)
|
||||
- Supports keyterm boosting
|
||||
|
||||
API Docs: https://developers.deepgram.com/docs/
|
||||
"""
|
||||
|
||||
WS_URL = "wss://api.deepgram.com/v2/listen"
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "deepgram-flux"
|
||||
|
||||
async def transcribe(
|
||||
self,
|
||||
audio_path: Path,
|
||||
diarize: bool = False, # Ignored - Flux doesn't support diarization
|
||||
keyterms: list[str] | None = None,
|
||||
on_event: EventCallback | None = None,
|
||||
model: str = "flux-general-en",
|
||||
sample_rate: int = 16000,
|
||||
eot_threshold: float | None = 0.70,
|
||||
eot_timeout_ms: int | None = 3000,
|
||||
eager_eot_threshold: float | None = None,
|
||||
trailing_silence_seconds: float = 3.0,
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Transcribe audio using Deepgram Flux WebSocket streaming.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
diarize: IGNORED - Flux does not support diarization
|
||||
keyterms: List of keywords to boost recognition
|
||||
on_event: Optional callback for raw WebSocket events
|
||||
model: Flux model (default: flux-general-en)
|
||||
sample_rate: Audio sample rate (default: 16000 for Flux)
|
||||
eot_threshold: End-of-turn confidence threshold (0-1, default 0.7)
|
||||
eot_timeout_ms: Timeout in ms to force end of turn (default 5000)
|
||||
eager_eot_threshold: Threshold for eager end-of-turn events
|
||||
trailing_silence_seconds: Seconds of silence after audio to capture pending events
|
||||
**kwargs: Additional Flux parameters
|
||||
|
||||
Returns:
|
||||
TranscriptionResult with transcript (no speaker info - Flux doesn't support diarization)
|
||||
"""
|
||||
if diarize:
|
||||
logger.warning("Flux does not support diarization - ignoring diarize=True")
|
||||
|
||||
# Build query params - Flux only supports specific params
|
||||
params: dict[str, Any] = {
|
||||
"model": model,
|
||||
"encoding": "linear16",
|
||||
"sample_rate": sample_rate,
|
||||
}
|
||||
|
||||
# Flux-specific turn detection params
|
||||
if eot_threshold is not None:
|
||||
params["eot_threshold"] = eot_threshold
|
||||
if eot_timeout_ms is not None:
|
||||
params["eot_timeout_ms"] = eot_timeout_ms
|
||||
if eager_eot_threshold is not None:
|
||||
params["eager_eot_threshold"] = eager_eot_threshold
|
||||
|
||||
# Build URL with params
|
||||
url_parts = [f"{k}={v}" for k, v in params.items()]
|
||||
|
||||
# Add keyterms (repeated params)
|
||||
if keyterms:
|
||||
for term in keyterms:
|
||||
url_parts.append(urlencode({"keyterm": term}))
|
||||
|
||||
ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
|
||||
logger.debug(f"Flux WebSocket URL: {ws_url}")
|
||||
|
||||
# Setup audio streamer
|
||||
audio_config = AudioConfig(sample_rate=sample_rate)
|
||||
streamer = AudioStreamer(audio_config)
|
||||
|
||||
# Collect results
|
||||
all_transcripts: list[dict[str, Any]] = []
|
||||
final_transcript = ""
|
||||
duration = 0.0
|
||||
connected = asyncio.Event()
|
||||
|
||||
async with websocket_connect(
|
||||
ws_url,
|
||||
additional_headers={"Authorization": f"Token {self.api_key}"},
|
||||
) as ws:
|
||||
|
||||
async def send_audio():
|
||||
"""Send audio chunks to Deepgram Flux."""
|
||||
await connected.wait()
|
||||
|
||||
chunk_no = 0
|
||||
async for chunk in streamer.stream_file(
|
||||
audio_path, trailing_silence_seconds=trailing_silence_seconds
|
||||
):
|
||||
logger.trace(f"[deepgram-flux] Sent audio chunk {chunk_no}")
|
||||
await ws.send(chunk)
|
||||
chunk_no += 1
|
||||
|
||||
async def receive_messages():
|
||||
"""Receive and collect Flux messages."""
|
||||
nonlocal all_transcripts, final_transcript, duration
|
||||
|
||||
async for message in ws:
|
||||
if isinstance(message, str):
|
||||
data = json.loads(message)
|
||||
msg_type = data.get("type")
|
||||
logger.debug(f"[deepgram-flux] Received {msg_type}: {data}")
|
||||
|
||||
# Emit event via callback if provided
|
||||
if on_event and msg_type:
|
||||
on_event(msg_type, data)
|
||||
|
||||
if msg_type == "Connected":
|
||||
logger.info("[deepgram-flux] Connected")
|
||||
connected.set()
|
||||
|
||||
elif msg_type == "TurnInfo":
|
||||
event = data.get("event")
|
||||
transcript = data.get("transcript", "")
|
||||
words = data.get("words", [])
|
||||
|
||||
if event == "EndOfTurn":
|
||||
if transcript:
|
||||
final_transcript += transcript + " "
|
||||
if words:
|
||||
all_transcripts.append({
|
||||
"transcript": transcript,
|
||||
"words": words,
|
||||
})
|
||||
# Get duration from last word
|
||||
if words:
|
||||
last_word = words[-1]
|
||||
duration = max(duration, last_word.get("end", 0))
|
||||
|
||||
elif event == "TurnResumed":
|
||||
logger.debug("TurnResumed")
|
||||
|
||||
elif msg_type == "Error":
|
||||
raise Exception(f"Deepgram Flux error: {data}")
|
||||
|
||||
# Run send and receive concurrently
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
receive_task = asyncio.create_task(receive_messages())
|
||||
|
||||
await send_task
|
||||
|
||||
logger.debug("[deepgram-flux] Send task done")
|
||||
try:
|
||||
await asyncio.wait_for(receive_task, timeout=10.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
return self._parse_results(
|
||||
all_transcripts, final_transcript.strip(), duration, params, keyterms
|
||||
)
|
||||
|
||||
def _parse_results(
|
||||
self,
|
||||
transcripts: list[dict[str, Any]],
|
||||
final_transcript: str,
|
||||
duration: float,
|
||||
params: dict[str, Any],
|
||||
keyterms: list[str] | None,
|
||||
) -> TranscriptionResult:
|
||||
"""Parse collected Flux results into TranscriptionResult."""
|
||||
words = []
|
||||
|
||||
for turn in transcripts:
|
||||
for w in turn.get("words", []):
|
||||
words.append(
|
||||
Word(
|
||||
word=w.get("word", ""),
|
||||
start=w.get("start", 0.0),
|
||||
end=w.get("end", 0.0),
|
||||
confidence=w.get("confidence", 0.0),
|
||||
speaker=None, # Flux doesn't support diarization
|
||||
speaker_confidence=None,
|
||||
)
|
||||
)
|
||||
|
||||
stored_params = dict(params)
|
||||
if keyterms:
|
||||
stored_params["keyterms"] = keyterms
|
||||
|
||||
return TranscriptionResult(
|
||||
provider=self.name,
|
||||
transcript=final_transcript,
|
||||
words=words,
|
||||
speakers=[], # Flux doesn't support diarization
|
||||
duration=duration,
|
||||
raw_response={"transcripts": transcripts},
|
||||
params=stored_params,
|
||||
)
|
||||
236
evals/stt/providers/deepgram_provider.py
Normal file
236
evals/stt/providers/deepgram_provider.py
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
"""Deepgram STT provider with WebSocket streaming."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from ..audio_streamer import AudioConfig, AudioStreamer
|
||||
from .base import EventCallback, STTProvider, TranscriptionResult, Word
|
||||
from loguru import logger
|
||||
|
||||
try:
|
||||
from websockets.asyncio.client import connect as websocket_connect
|
||||
except ImportError:
|
||||
raise ImportError("websockets required: pip install websockets")
|
||||
|
||||
|
||||
class DeepgramProvider(STTProvider):
|
||||
"""Deepgram Nova Speech-to-Text provider with WebSocket streaming.
|
||||
|
||||
API Docs: https://developers.deepgram.com/docs/
|
||||
|
||||
Supports:
|
||||
- Speaker diarization via `diarize=true`
|
||||
- Keyterm boosting via `keyterm` parameter
|
||||
- Real-time streaming via WebSocket
|
||||
- Multiple languages
|
||||
- Punctuation
|
||||
|
||||
For Flux models, use DeepgramFluxProvider instead.
|
||||
"""
|
||||
|
||||
WS_URL = "wss://api.deepgram.com/v1/listen"
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"Deepgram API key required. Set DEEPGRAM_API_KEY env var or pass api_key."
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "deepgram"
|
||||
|
||||
async def transcribe(
|
||||
self,
|
||||
audio_path: Path,
|
||||
diarize: bool = False,
|
||||
keyterms: list[str] | None = None,
|
||||
on_event: EventCallback | None = None,
|
||||
model: str = "nova-3-general",
|
||||
language: str = "en",
|
||||
sample_rate: int = 8000,
|
||||
punctuate: bool = True,
|
||||
trailing_silence_seconds: float = 3.0,
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Transcribe audio using Deepgram Nova WebSocket streaming.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
diarize: Enable speaker diarization
|
||||
keyterms: List of keywords to boost recognition
|
||||
on_event: Optional callback for raw WebSocket events
|
||||
model: Deepgram Nova model (nova-3, nova-2, etc.)
|
||||
language: Language code
|
||||
sample_rate: Audio sample rate for streaming
|
||||
punctuate: Add punctuation
|
||||
trailing_silence_seconds: Seconds of silence after audio to capture pending events
|
||||
**kwargs: Additional Deepgram parameters
|
||||
|
||||
Returns:
|
||||
TranscriptionResult with transcript and speaker info
|
||||
"""
|
||||
# Build query params
|
||||
params: dict[str, Any] = {
|
||||
"model": model,
|
||||
"language": language,
|
||||
"punctuate": str(punctuate).lower(),
|
||||
"encoding": "linear16",
|
||||
"sample_rate": sample_rate,
|
||||
"channels": 1,
|
||||
"interim_results": "true",
|
||||
"smart_format": "true",
|
||||
"profanity_filter": "true",
|
||||
"vad_events": "true",
|
||||
"utterance_end_ms": "1000"
|
||||
}
|
||||
|
||||
if diarize:
|
||||
params["diarize"] = "true"
|
||||
|
||||
# Build URL with params
|
||||
url_parts = [f"{k}={v}" for k, v in params.items()]
|
||||
|
||||
# Add keyterms (repeated params)
|
||||
if keyterms:
|
||||
for term in keyterms:
|
||||
url_parts.append(urlencode({"keyterm": term}))
|
||||
|
||||
# Add extra kwargs
|
||||
for k, v in kwargs.items():
|
||||
url_parts.append(f"{k}={v}")
|
||||
|
||||
ws_url = f"{self.WS_URL}?{'&'.join(url_parts)}"
|
||||
logger.debug(f"Deepgram WebSocket URL: {ws_url}")
|
||||
|
||||
# Setup audio streamer
|
||||
audio_config = AudioConfig(sample_rate=sample_rate)
|
||||
streamer = AudioStreamer(audio_config)
|
||||
|
||||
# Collect results
|
||||
all_words: list[dict[str, Any]] = []
|
||||
final_transcript = ""
|
||||
duration = 0.0
|
||||
|
||||
try:
|
||||
async with websocket_connect(
|
||||
ws_url,
|
||||
additional_headers={"Authorization": f"Token {self.api_key}"},
|
||||
) as ws:
|
||||
# Create tasks for sending and receiving
|
||||
send_complete = asyncio.Event()
|
||||
|
||||
async def send_audio():
|
||||
"""Send audio chunks to Deepgram."""
|
||||
chunk_no = 0
|
||||
async for chunk in streamer.stream_file(
|
||||
audio_path, trailing_silence_seconds=trailing_silence_seconds
|
||||
):
|
||||
logger.trace(f"[deepgram] Sent audio chunk {chunk_no}")
|
||||
await ws.send(chunk)
|
||||
chunk_no += 1
|
||||
# Send close message
|
||||
logger.debug(f"[deepgram] Sending CloseStream after {chunk_no} chunks")
|
||||
await ws.send(json.dumps({"type": "CloseStream"}))
|
||||
send_complete.set()
|
||||
|
||||
async def receive_transcripts():
|
||||
"""Receive and collect transcription results."""
|
||||
nonlocal all_words, final_transcript, duration
|
||||
|
||||
async for message in ws:
|
||||
if isinstance(message, str):
|
||||
data = json.loads(message)
|
||||
msg_type = data.get("type")
|
||||
logger.debug(f"[deepgram] Received {msg_type}: {data}")
|
||||
|
||||
# Emit event via callback if provided
|
||||
if on_event and msg_type:
|
||||
on_event(msg_type, data)
|
||||
|
||||
if msg_type == "Results":
|
||||
# Nova-style response
|
||||
channel = data.get("channel", {})
|
||||
alternatives = channel.get("alternatives", [])
|
||||
if alternatives:
|
||||
alt = alternatives[0]
|
||||
words = alt.get("words", [])
|
||||
all_words.extend(words)
|
||||
|
||||
# Check if final
|
||||
if data.get("is_final"):
|
||||
final_transcript += alt.get("transcript", "") + " "
|
||||
duration = max(
|
||||
duration, data.get("duration", 0) + data.get("start", 0)
|
||||
)
|
||||
|
||||
elif msg_type == "Metadata":
|
||||
# Get duration from metadata
|
||||
duration = data.get("duration", duration)
|
||||
|
||||
elif msg_type == "Error":
|
||||
raise Exception(f"Deepgram error: {data}")
|
||||
|
||||
# Run send and receive concurrently
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
receive_task = asyncio.create_task(receive_transcripts())
|
||||
|
||||
# Wait for send to complete, then wait a bit for final results
|
||||
await send_task
|
||||
try:
|
||||
await asyncio.wait_for(receive_task, timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass # Normal - websocket closes after final results
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
|
||||
return self._parse_results(
|
||||
all_words, final_transcript.strip(), duration, params, keyterms
|
||||
)
|
||||
|
||||
def _parse_results(
|
||||
self,
|
||||
raw_words: list[dict[str, Any]],
|
||||
transcript: str,
|
||||
duration: float,
|
||||
params: dict[str, Any],
|
||||
keyterms: list[str] | None,
|
||||
) -> TranscriptionResult:
|
||||
"""Parse collected results into TranscriptionResult."""
|
||||
words = []
|
||||
speakers_set: set[str] = set()
|
||||
|
||||
for w in raw_words:
|
||||
speaker = str(w.get("speaker", "")) if "speaker" in w else None
|
||||
if speaker:
|
||||
speakers_set.add(speaker)
|
||||
|
||||
words.append(
|
||||
Word(
|
||||
word=w.get("word", ""),
|
||||
start=w.get("start", 0.0),
|
||||
end=w.get("end", 0.0),
|
||||
confidence=w.get("confidence", 0.0),
|
||||
speaker=speaker,
|
||||
speaker_confidence=w.get("speaker_confidence"),
|
||||
)
|
||||
)
|
||||
|
||||
stored_params = dict(params)
|
||||
if keyterms:
|
||||
stored_params["keyterms"] = keyterms
|
||||
|
||||
return TranscriptionResult(
|
||||
provider=self.name,
|
||||
transcript=transcript,
|
||||
words=words,
|
||||
speakers=sorted(speakers_set),
|
||||
duration=duration,
|
||||
raw_response={"words": raw_words},
|
||||
params=stored_params,
|
||||
)
|
||||
287
evals/stt/providers/local_smart_turn_provider.py
Normal file
287
evals/stt/providers/local_smart_turn_provider.py
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
"""Local Smart Turn provider for benchmarking end-of-turn detection.
|
||||
|
||||
Uses the pipecat smart-turn-v3 ONNX model for local ML-based turn detection.
|
||||
This is NOT an STT provider - it only detects when a speaker has finished talking.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
from ..audio_streamer import AudioConfig, AudioStreamer
|
||||
from .base import EventCallback, STTProvider, TranscriptionResult, Word
|
||||
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
from transformers import WhisperFeatureExtractor
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"onnxruntime and transformers required: pip install onnxruntime transformers"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TurnEvent:
|
||||
"""Represents a detected turn event."""
|
||||
timestamp: float # Time in audio when turn was detected
|
||||
probability: float # Model confidence
|
||||
prediction: int # 1=complete, 0=incomplete
|
||||
inference_time_ms: float
|
||||
|
||||
|
||||
class LocalSmartTurnProvider(STTProvider):
|
||||
"""Local Smart Turn provider for end-of-turn detection benchmarking.
|
||||
|
||||
Uses the smart-turn-v3 ONNX model to detect when speakers finish talking.
|
||||
This is useful for comparing turn detection accuracy against cloud services
|
||||
like Deepgram Flux's built-in turn detection.
|
||||
|
||||
NOTE: This provider does NOT produce transcripts - only turn detection events.
|
||||
"""
|
||||
|
||||
# Smart turn model requires 16kHz audio
|
||||
REQUIRED_SAMPLE_RATE = 16000
|
||||
# Model analyzes 8 seconds of audio
|
||||
WINDOW_SECONDS = 8
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str | None = None,
|
||||
cpu_count: int = 1,
|
||||
):
|
||||
"""Initialize the local smart turn provider.
|
||||
|
||||
Args:
|
||||
model_path: Path to ONNX model file. If None, uses bundled model.
|
||||
cpu_count: Number of CPUs for inference (default: 1)
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.cpu_count = cpu_count
|
||||
self._session = None
|
||||
self._feature_extractor = None
|
||||
|
||||
def _load_model(self):
|
||||
"""Lazy load the ONNX model."""
|
||||
if self._session is not None:
|
||||
return
|
||||
|
||||
model_path = self.model_path
|
||||
|
||||
if not model_path:
|
||||
# Try to load bundled model from pipecat
|
||||
model_name = "smart-turn-v3.1-cpu.onnx"
|
||||
package_path = "pipecat.audio.turn.smart_turn.data"
|
||||
|
||||
try:
|
||||
import importlib_resources as impresources
|
||||
model_path = str(impresources.files(package_path).joinpath(model_name))
|
||||
except Exception:
|
||||
from importlib import resources as impresources
|
||||
try:
|
||||
with impresources.path(package_path, model_name) as f:
|
||||
model_path = str(f)
|
||||
except Exception:
|
||||
model_path = str(impresources.files(package_path).joinpath(model_name))
|
||||
|
||||
logger.info(f"[local-smart-turn] Loading model from {model_path}")
|
||||
|
||||
# Configure ONNX runtime
|
||||
so = ort.SessionOptions()
|
||||
so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
so.inter_op_num_threads = 1
|
||||
so.intra_op_num_threads = self.cpu_count
|
||||
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
||||
self._feature_extractor = WhisperFeatureExtractor(chunk_length=8)
|
||||
self._session = ort.InferenceSession(model_path, sess_options=so)
|
||||
|
||||
logger.info("[local-smart-turn] Model loaded")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "local-smart-turn"
|
||||
|
||||
def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
|
||||
"""Predict end-of-turn using the ONNX model.
|
||||
|
||||
Args:
|
||||
audio_array: Audio samples as float32 numpy array (16kHz)
|
||||
|
||||
Returns:
|
||||
Dict with prediction (0/1) and probability
|
||||
"""
|
||||
# Truncate to last 8 seconds or pad to 8 seconds
|
||||
max_samples = self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE
|
||||
if len(audio_array) > max_samples:
|
||||
audio_array = audio_array[-max_samples:]
|
||||
elif len(audio_array) < max_samples:
|
||||
padding = max_samples - len(audio_array)
|
||||
audio_array = np.pad(audio_array, (padding, 0), mode="constant", constant_values=0)
|
||||
|
||||
# Process using Whisper's feature extractor
|
||||
inputs = self._feature_extractor(
|
||||
audio_array,
|
||||
sampling_rate=self.REQUIRED_SAMPLE_RATE,
|
||||
return_tensors="np",
|
||||
padding="max_length",
|
||||
max_length=self.WINDOW_SECONDS * self.REQUIRED_SAMPLE_RATE,
|
||||
truncation=True,
|
||||
do_normalize=True,
|
||||
)
|
||||
|
||||
# Extract features for ONNX
|
||||
input_features = inputs.input_features.squeeze(0).astype(np.float32)
|
||||
input_features = np.expand_dims(input_features, axis=0)
|
||||
|
||||
# Run inference
|
||||
start_time = time.perf_counter()
|
||||
outputs = self._session.run(None, {"input_features": input_features})
|
||||
inference_time = (time.perf_counter() - start_time) * 1000
|
||||
|
||||
# Extract probability (model returns sigmoid probabilities)
|
||||
probability = outputs[0][0].item()
|
||||
prediction = 1 if probability > 0.5 else 0
|
||||
|
||||
return {
|
||||
"prediction": prediction,
|
||||
"probability": probability,
|
||||
"inference_time_ms": inference_time,
|
||||
}
|
||||
|
||||
async def transcribe(
|
||||
self,
|
||||
audio_path: Path,
|
||||
diarize: bool = False, # Ignored - not applicable
|
||||
keyterms: list[str] | None = None, # Ignored - not applicable
|
||||
on_event: EventCallback | None = None, # Ignored - not applicable
|
||||
sample_rate: int = 16000, # Must be 16kHz for smart turn
|
||||
analysis_interval_ms: int = 500, # How often to check for turn completion
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Analyze audio for turn detection events.
|
||||
|
||||
NOTE: This does NOT produce transcripts. It detects when speakers
|
||||
finish talking using ML-based turn detection.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
diarize: Ignored (not applicable for turn detection)
|
||||
keyterms: Ignored (not applicable for turn detection)
|
||||
on_event: Ignored (not applicable for turn detection)
|
||||
sample_rate: Must be 16000 Hz for smart turn model
|
||||
analysis_interval_ms: How often to run turn detection (ms)
|
||||
**kwargs: Additional parameters (ignored)
|
||||
|
||||
Returns:
|
||||
TranscriptionResult with turn detection events in raw_response
|
||||
"""
|
||||
if sample_rate != self.REQUIRED_SAMPLE_RATE:
|
||||
logger.warning(
|
||||
f"[local-smart-turn] Sample rate must be {self.REQUIRED_SAMPLE_RATE}Hz, "
|
||||
f"overriding {sample_rate}Hz"
|
||||
)
|
||||
sample_rate = self.REQUIRED_SAMPLE_RATE
|
||||
|
||||
# Load model if not already loaded
|
||||
self._load_model()
|
||||
|
||||
# Setup audio streamer at 16kHz
|
||||
audio_config = AudioConfig(sample_rate=sample_rate)
|
||||
streamer = AudioStreamer(audio_config)
|
||||
|
||||
# Get audio duration
|
||||
duration = streamer.get_duration(audio_path)
|
||||
logger.info(f"[local-smart-turn] Processing {audio_path} ({duration:.2f}s)")
|
||||
|
||||
# Collect all audio first (smart turn needs to analyze segments)
|
||||
pcm_data = streamer.convert_to_pcm16(audio_path)
|
||||
|
||||
# Convert to float32 for model
|
||||
audio_int16 = np.frombuffer(pcm_data, dtype=np.int16)
|
||||
audio_float32 = audio_int16.astype(np.float32) / 32768.0
|
||||
|
||||
# Analyze at intervals
|
||||
turn_events: list[TurnEvent] = []
|
||||
samples_per_interval = int(sample_rate * analysis_interval_ms / 1000)
|
||||
window_samples = self.WINDOW_SECONDS * sample_rate
|
||||
|
||||
chunk_no = 0
|
||||
for end_sample in range(samples_per_interval, len(audio_float32), samples_per_interval):
|
||||
# Get window of audio ending at current position
|
||||
start_sample = max(0, end_sample - window_samples)
|
||||
audio_window = audio_float32[start_sample:end_sample]
|
||||
|
||||
current_time = end_sample / sample_rate
|
||||
logger.debug(f"[local-smart-turn] Analyzing chunk {chunk_no} at {current_time:.2f}s")
|
||||
|
||||
result = self._predict_endpoint(audio_window)
|
||||
|
||||
turn_events.append(TurnEvent(
|
||||
timestamp=current_time,
|
||||
probability=result["probability"],
|
||||
prediction=result["prediction"],
|
||||
inference_time_ms=result["inference_time_ms"],
|
||||
))
|
||||
|
||||
if result["prediction"] == 1:
|
||||
logger.info(
|
||||
f"[local-smart-turn] Turn complete at {current_time:.2f}s "
|
||||
f"(prob={result['probability']:.3f})"
|
||||
f"(inf time ms={result["inference_time_ms"]})"
|
||||
)
|
||||
|
||||
chunk_no += 1
|
||||
|
||||
# Create result
|
||||
# Convert turn events to word-like format for compatibility
|
||||
words = []
|
||||
for event in turn_events:
|
||||
if event.prediction == 1:
|
||||
words.append(Word(
|
||||
word=f"[END_OF_TURN prob={event.probability:.2f}]",
|
||||
start=event.timestamp - 0.1,
|
||||
end=event.timestamp,
|
||||
confidence=event.probability,
|
||||
speaker=None,
|
||||
speaker_confidence=None,
|
||||
))
|
||||
|
||||
# Count completed turns
|
||||
completed_turns = sum(1 for e in turn_events if e.prediction == 1)
|
||||
|
||||
params = {
|
||||
"sample_rate": sample_rate,
|
||||
"analysis_interval_ms": analysis_interval_ms,
|
||||
"window_seconds": self.WINDOW_SECONDS,
|
||||
}
|
||||
|
||||
return TranscriptionResult(
|
||||
provider=self.name,
|
||||
transcript=f"[Turn detection only - {completed_turns} turns detected]",
|
||||
words=words,
|
||||
speakers=[], # Not applicable
|
||||
duration=duration,
|
||||
raw_response={
|
||||
"turn_events": [
|
||||
{
|
||||
"timestamp": e.timestamp,
|
||||
"probability": e.probability,
|
||||
"prediction": e.prediction,
|
||||
"inference_time_ms": e.inference_time_ms,
|
||||
}
|
||||
for e in turn_events
|
||||
],
|
||||
"completed_turns": completed_turns,
|
||||
"total_analyses": len(turn_events),
|
||||
"avg_inference_time_ms": (
|
||||
sum(e.inference_time_ms for e in turn_events) / len(turn_events)
|
||||
if turn_events else 0
|
||||
),
|
||||
},
|
||||
params=params,
|
||||
)
|
||||
258
evals/stt/providers/speechmatics_provider.py
Normal file
258
evals/stt/providers/speechmatics_provider.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
"""Speechmatics STT provider with WebSocket streaming."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ..audio_streamer import AudioConfig, AudioStreamer
|
||||
from .base import EventCallback, STTProvider, TranscriptionResult, Word
|
||||
|
||||
try:
|
||||
from websockets.asyncio.client import connect as websocket_connect
|
||||
except ImportError:
|
||||
raise ImportError("websockets required: pip install websockets")
|
||||
|
||||
|
||||
class SpeechmaticsProvider(STTProvider):
|
||||
"""Speechmatics Speech-to-Text provider with WebSocket streaming.
|
||||
|
||||
API Docs: https://docs.speechmatics.com/
|
||||
|
||||
Supports:
|
||||
- Speaker diarization via `diarization: "speaker"` config
|
||||
- Speaker sensitivity tuning
|
||||
- Real-time streaming via WebSocket
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str | None = None, region: str = "eu2"):
|
||||
self.api_key = api_key or os.getenv("SPEECHMATICS_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"Speechmatics API key required. Set SPEECHMATICS_API_KEY env var or pass api_key."
|
||||
)
|
||||
# Set region-specific endpoint
|
||||
self.ws_url = f"wss://{region}.rt.speechmatics.com/v2"
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "speechmatics"
|
||||
|
||||
async def transcribe(
|
||||
self,
|
||||
audio_path: Path,
|
||||
diarize: bool = False,
|
||||
keyterms: list[str] | None = None,
|
||||
on_event: EventCallback | None = None,
|
||||
language: str = "en",
|
||||
operating_point: str = "enhanced",
|
||||
sample_rate: int = 8000,
|
||||
speaker_sensitivity: float | None = None,
|
||||
max_speakers: int | None = None,
|
||||
trailing_silence_seconds: float = 3.0,
|
||||
**kwargs: Any,
|
||||
) -> TranscriptionResult:
|
||||
"""Transcribe audio using Speechmatics WebSocket streaming.
|
||||
|
||||
Args:
|
||||
audio_path: Path to audio file
|
||||
diarize: Enable speaker diarization
|
||||
keyterms: Additional vocabulary (limited support)
|
||||
on_event: Optional callback for raw WebSocket events
|
||||
language: Language code
|
||||
operating_point: "standard" or "enhanced"
|
||||
sample_rate: Audio sample rate for streaming
|
||||
speaker_sensitivity: 0.0-1.0, higher = more speakers detected
|
||||
max_speakers: Maximum number of speakers to detect
|
||||
trailing_silence_seconds: Seconds of silence after audio to capture pending events
|
||||
**kwargs: Additional config parameters
|
||||
|
||||
Returns:
|
||||
TranscriptionResult with transcript and speaker info
|
||||
"""
|
||||
# Build transcription config for StartRecognition message
|
||||
transcription_config: dict[str, Any] = {
|
||||
"language": language,
|
||||
"operating_point": operating_point,
|
||||
"enable_partials": False,
|
||||
}
|
||||
|
||||
if diarize:
|
||||
transcription_config["diarization"] = "speaker"
|
||||
if speaker_sensitivity is not None:
|
||||
transcription_config["speaker_diarization_config"] = {
|
||||
"speaker_sensitivity": speaker_sensitivity
|
||||
}
|
||||
if max_speakers is not None:
|
||||
if "speaker_diarization_config" not in transcription_config:
|
||||
transcription_config["speaker_diarization_config"] = {}
|
||||
transcription_config["speaker_diarization_config"]["max_speakers"] = max_speakers
|
||||
|
||||
# Add additional vocabulary if provided
|
||||
if keyterms:
|
||||
transcription_config["additional_vocab"] = [{"content": term} for term in keyterms]
|
||||
|
||||
# Audio format config
|
||||
audio_format = {
|
||||
"type": "raw",
|
||||
"encoding": "pcm_s16le",
|
||||
"sample_rate": sample_rate,
|
||||
}
|
||||
|
||||
# Store params for result
|
||||
params = {
|
||||
"diarize": diarize,
|
||||
"language": language,
|
||||
"operating_point": operating_point,
|
||||
"sample_rate": sample_rate,
|
||||
"speaker_sensitivity": speaker_sensitivity,
|
||||
"max_speakers": max_speakers,
|
||||
}
|
||||
|
||||
# Setup audio streamer
|
||||
audio_config = AudioConfig(sample_rate=sample_rate)
|
||||
streamer = AudioStreamer(audio_config)
|
||||
|
||||
# Collect results
|
||||
all_results: list[dict[str, Any]] = []
|
||||
recognition_started = asyncio.Event()
|
||||
transcription_complete = asyncio.Event()
|
||||
|
||||
async with websocket_connect(
|
||||
self.ws_url,
|
||||
additional_headers={"Authorization": f"Bearer {self.api_key}"},
|
||||
) as ws:
|
||||
# Send StartRecognition message
|
||||
start_msg = {
|
||||
"message": "StartRecognition",
|
||||
"transcription_config": transcription_config,
|
||||
"audio_format": audio_format,
|
||||
}
|
||||
await ws.send(json.dumps(start_msg))
|
||||
|
||||
async def send_audio():
|
||||
"""Send audio chunks after recognition starts."""
|
||||
await recognition_started.wait()
|
||||
|
||||
chunk_no = 0
|
||||
async for chunk in streamer.stream_file(
|
||||
audio_path, trailing_silence_seconds=trailing_silence_seconds
|
||||
):
|
||||
logger.debug(f"[speechmatics] Sent audio chunk {chunk_no}")
|
||||
await ws.send(chunk)
|
||||
chunk_no += 1
|
||||
|
||||
# Signal end of audio with last sequence number
|
||||
logger.debug(f"[speechmatics] Sending EndOfStream after {chunk_no} chunks")
|
||||
await ws.send(json.dumps({"message": "EndOfStream", "last_seq_no": chunk_no}))
|
||||
|
||||
async def receive_messages():
|
||||
"""Receive and process messages."""
|
||||
nonlocal all_results
|
||||
|
||||
async for message in ws:
|
||||
if isinstance(message, str):
|
||||
data = json.loads(message)
|
||||
msg_type = data.get("message")
|
||||
logger.debug(f"[speechmatics] Received {msg_type}: {data}")
|
||||
|
||||
# Emit event via callback if provided
|
||||
if on_event and msg_type:
|
||||
on_event(msg_type, data)
|
||||
|
||||
if msg_type == "RecognitionStarted":
|
||||
logger.info("[speechmatics] Connected")
|
||||
recognition_started.set()
|
||||
|
||||
elif msg_type == "AddTranscript":
|
||||
# Final transcript segment
|
||||
results = data.get("results", [])
|
||||
all_results.extend(results)
|
||||
|
||||
elif msg_type == "EndOfTranscript":
|
||||
transcription_complete.set()
|
||||
return
|
||||
|
||||
elif msg_type == "Error":
|
||||
raise Exception(f"Speechmatics error: {data}")
|
||||
|
||||
elif msg_type == "Warning":
|
||||
logger.warning(f"[speechmatics] Warning: {data.get('reason')}")
|
||||
|
||||
# Run send and receive concurrently
|
||||
send_task = asyncio.create_task(send_audio())
|
||||
receive_task = asyncio.create_task(receive_messages())
|
||||
|
||||
# Wait for completion
|
||||
await send_task
|
||||
try:
|
||||
await asyncio.wait_for(transcription_complete.wait(), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
receive_task.cancel()
|
||||
try:
|
||||
await receive_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
return self._parse_results(all_results, params)
|
||||
|
||||
def _parse_results(
|
||||
self,
|
||||
results: list[dict[str, Any]],
|
||||
params: dict[str, Any],
|
||||
) -> TranscriptionResult:
|
||||
"""Parse Speechmatics results."""
|
||||
words = []
|
||||
speakers_set: set[str] = set()
|
||||
transcript_parts = []
|
||||
duration = 0.0
|
||||
|
||||
for item in results:
|
||||
item_type = item.get("type")
|
||||
alternatives = item.get("alternatives", [])
|
||||
|
||||
if not alternatives:
|
||||
continue
|
||||
|
||||
alt = alternatives[0]
|
||||
content = alt.get("content", "")
|
||||
speaker = alt.get("speaker")
|
||||
|
||||
if speaker:
|
||||
speakers_set.add(speaker)
|
||||
|
||||
end_time = item.get("end_time", 0.0)
|
||||
duration = max(duration, end_time)
|
||||
|
||||
if item_type == "word":
|
||||
words.append(
|
||||
Word(
|
||||
word=content,
|
||||
start=item.get("start_time", 0.0),
|
||||
end=end_time,
|
||||
confidence=alt.get("confidence", 0.0),
|
||||
speaker=speaker,
|
||||
speaker_confidence=None,
|
||||
)
|
||||
)
|
||||
transcript_parts.append(content)
|
||||
elif item_type == "punctuation":
|
||||
if transcript_parts:
|
||||
transcript_parts[-1] += content
|
||||
|
||||
transcript = " ".join(transcript_parts)
|
||||
|
||||
return TranscriptionResult(
|
||||
provider=self.name,
|
||||
transcript=transcript,
|
||||
words=words,
|
||||
speakers=sorted(speakers_set),
|
||||
duration=duration,
|
||||
raw_response={"results": results},
|
||||
params=params,
|
||||
)
|
||||
867
evals/stt/results/multi_speaker-deepgram-flux.json
Normal file
867
evals/stt/results/multi_speaker-deepgram-flux.json
Normal file
|
|
@ -0,0 +1,867 @@
|
|||
{
|
||||
"audio_file": "multi_speaker.m4a",
|
||||
"audio_path": "../audio/multi_speaker.m4a",
|
||||
"provider": "deepgram-flux",
|
||||
"duration": 7.987664,
|
||||
"created_at": "2026-01-20T12:21:59.183902",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 3.1916191801428795e-05,
|
||||
"event_type": "Connected",
|
||||
"data": {
|
||||
"type": "Connected",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"sequence_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.6468284581787884,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2195,
|
||||
"sequence_id": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.8891876661218703,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.167,
|
||||
"sequence_id": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0987569580320269,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1045,
|
||||
"sequence_id": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.356455208035186,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.3054,
|
||||
"sequence_id": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.6076077912002802,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.2,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2996,
|
||||
"sequence_id": 5
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.831926790997386,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.44,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1659,
|
||||
"sequence_id": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.0988957500085235,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.6800001,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0922,
|
||||
"sequence_id": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.320036916062236,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.9200001,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1154,
|
||||
"sequence_id": 8
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.5783222501631826,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.16,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0789,
|
||||
"sequence_id": 9
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.805098250042647,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.4,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.028,
|
||||
"sequence_id": 10
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.0677467910572886,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.6399999,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0544,
|
||||
"sequence_id": 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.3053550410550088,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.88,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0221,
|
||||
"sequence_id": 12
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.5730851250700653,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 3.12,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0896,
|
||||
"sequence_id": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.7986690001562238,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 3.3600001,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0837,
|
||||
"sequence_id": 14
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.056284500053152,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 3.6,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0217,
|
||||
"sequence_id": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.2824959580320865,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 3.84,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0277,
|
||||
"sequence_id": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.541013500187546,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 4.08,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0636,
|
||||
"sequence_id": 17
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.7826515410561115,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 4.32,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.092,
|
||||
"sequence_id": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.044063208159059,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 4.56,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1632,
|
||||
"sequence_id": 19
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.277323708171025,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 4.8,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1748,
|
||||
"sequence_id": 20
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.519584750058129,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 5.04,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1267,
|
||||
"sequence_id": 21
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.761642290977761,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 5.28,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.085,
|
||||
"sequence_id": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.985961250029504,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 5.52,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0726,
|
||||
"sequence_id": 23
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.235282083041966,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 5.76,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1489,
|
||||
"sequence_id": 24
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.479744625044987,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 6.0,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1815,
|
||||
"sequence_id": 25
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.722758750198409,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 6.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1548,
|
||||
"sequence_id": 26
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.02101350016892,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 6.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1779,
|
||||
"sequence_id": 27
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.2554090830963105,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 6.7200003,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1924,
|
||||
"sequence_id": 28
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.495738583151251,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 6.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0734,
|
||||
"sequence_id": 29
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.695259500062093,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 7.2,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0621,
|
||||
"sequence_id": 30
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.9374284581281245,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 7.44,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0523,
|
||||
"sequence_id": 31
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.201127333100885,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 7.68,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0868,
|
||||
"sequence_id": 32
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.452570000197738,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 7.92,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1788,
|
||||
"sequence_id": 33
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.6957666662056,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 8.16,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.3462,
|
||||
"sequence_id": 34
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.937032666057348,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 8.4,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.3477,
|
||||
"sequence_id": 35
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 9.179693832993507,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 8.64,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2825,
|
||||
"sequence_id": 36
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 9.439219749998301,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 8.88,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1785,
|
||||
"sequence_id": 37
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 9.65257745818235,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 9.12,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.119,
|
||||
"sequence_id": 38
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 9.894739540992305,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 9.36,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0948,
|
||||
"sequence_id": 39
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 10.137037916108966,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 9.6,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0836,
|
||||
"sequence_id": 40
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 10.37885733298026,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 9.84,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0648,
|
||||
"sequence_id": 41
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 10.640081625198945,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 10.08,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0426,
|
||||
"sequence_id": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 10.882513708202168,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 10.32,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0297,
|
||||
"sequence_id": 43
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 11.11375533300452,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 10.56,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0247,
|
||||
"sequence_id": 44
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 11.356210750062019,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 10.8,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0134,
|
||||
"sequence_id": 45
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 11.60117325000465,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 11.04,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0102,
|
||||
"sequence_id": 46
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 11.859979416010901,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 11.28,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0089,
|
||||
"sequence_id": 47
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 12.093679000157863,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 11.52,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0074,
|
||||
"sequence_id": 48
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 12.334945333190262,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 11.76,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.007,
|
||||
"sequence_id": 49
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 12.588809041073546,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 12.0,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0067,
|
||||
"sequence_id": 50
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 12.83585675014183,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 12.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0042,
|
||||
"sequence_id": 51
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 13.075434750178829,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 12.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0047,
|
||||
"sequence_id": 52
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 13.31491966615431,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "63038896-d7d9-4186-995f-16056c3306d5",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 12.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0036,
|
||||
"sequence_id": 53
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": ""
|
||||
}
|
||||
637
evals/stt/results/multi_speaker-deepgram.json
Normal file
637
evals/stt/results/multi_speaker-deepgram.json
Normal file
|
|
@ -0,0 +1,637 @@
|
|||
{
|
||||
"audio_file": "multi_speaker.m4a",
|
||||
"audio_path": "../audio/multi_speaker.m4a",
|
||||
"provider": "deepgram",
|
||||
"duration": 7.987664,
|
||||
"created_at": "2026-01-20T12:15:06.097292",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 2.50060111284256e-07,
|
||||
"event_type": "SpeechStarted",
|
||||
"data": {
|
||||
"type": "SpeechStarted",
|
||||
"channel": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"timestamp": 0.13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.9085824999492615,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 1.0399375,
|
||||
"start": 0.0,
|
||||
"is_final": false,
|
||||
"speech_final": false,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "Biggest pleasure",
|
||||
"confidence": 0.7919922,
|
||||
"words": [
|
||||
{
|
||||
"word": "biggest",
|
||||
"start": 0.0,
|
||||
"end": 0.39999998,
|
||||
"confidence": 0.7919922,
|
||||
"punctuated_word": "Biggest"
|
||||
},
|
||||
{
|
||||
"word": "pleasure",
|
||||
"start": 0.39999998,
|
||||
"end": 0.79999995,
|
||||
"confidence": 0.77734375,
|
||||
"punctuated_word": "pleasure"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.9669485830236226,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 2.0799375,
|
||||
"start": 0.0,
|
||||
"is_final": false,
|
||||
"speech_final": false,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.0349432919174433,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 3.1199374,
|
||||
"start": 0.0,
|
||||
"is_final": false,
|
||||
"speech_final": false,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "Please give a text that I am just trying to",
|
||||
"confidence": 0.4921875,
|
||||
"words": [
|
||||
{
|
||||
"word": "please",
|
||||
"start": 0.48,
|
||||
"end": 0.79999995,
|
||||
"confidence": 0.19970703,
|
||||
"punctuated_word": "Please"
|
||||
},
|
||||
{
|
||||
"word": "give",
|
||||
"start": 0.79999995,
|
||||
"end": 1.04,
|
||||
"confidence": 0.2849121,
|
||||
"punctuated_word": "give"
|
||||
},
|
||||
{
|
||||
"word": "a",
|
||||
"start": 0.96,
|
||||
"end": 1.1999999,
|
||||
"confidence": 0.4921875,
|
||||
"punctuated_word": "a"
|
||||
},
|
||||
{
|
||||
"word": "text",
|
||||
"start": 1.1999999,
|
||||
"end": 1.5999999,
|
||||
"confidence": 0.4482422,
|
||||
"punctuated_word": "text"
|
||||
},
|
||||
{
|
||||
"word": "that",
|
||||
"start": 1.5999999,
|
||||
"end": 2.1599998,
|
||||
"confidence": 0.5317383,
|
||||
"punctuated_word": "that"
|
||||
},
|
||||
{
|
||||
"word": "i",
|
||||
"start": 2.1599998,
|
||||
"end": 2.32,
|
||||
"confidence": 0.984375,
|
||||
"punctuated_word": "I"
|
||||
},
|
||||
{
|
||||
"word": "am",
|
||||
"start": 2.32,
|
||||
"end": 2.48,
|
||||
"confidence": 0.5024414,
|
||||
"punctuated_word": "am"
|
||||
},
|
||||
{
|
||||
"word": "just",
|
||||
"start": 2.48,
|
||||
"end": 2.6399999,
|
||||
"confidence": 0.27416992,
|
||||
"punctuated_word": "just"
|
||||
},
|
||||
{
|
||||
"word": "trying",
|
||||
"start": 2.6399999,
|
||||
"end": 2.96,
|
||||
"confidence": 0.19909668,
|
||||
"punctuated_word": "trying"
|
||||
},
|
||||
{
|
||||
"word": "to",
|
||||
"start": 2.96,
|
||||
"end": 3.04,
|
||||
"confidence": 0.7060547,
|
||||
"punctuated_word": "to"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.100316457916051,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 4.1599374,
|
||||
"start": 0.0,
|
||||
"is_final": false,
|
||||
"speech_final": false,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "Is the test that I am just trying do so. Multiple",
|
||||
"confidence": 0.7207031,
|
||||
"words": [
|
||||
{
|
||||
"word": "is",
|
||||
"start": 0.24,
|
||||
"end": 0.79999995,
|
||||
"confidence": 0.83251953,
|
||||
"punctuated_word": "Is"
|
||||
},
|
||||
{
|
||||
"word": "the",
|
||||
"start": 0.88,
|
||||
"end": 1.12,
|
||||
"confidence": 0.14794922,
|
||||
"punctuated_word": "the"
|
||||
},
|
||||
{
|
||||
"word": "test",
|
||||
"start": 1.12,
|
||||
"end": 1.52,
|
||||
"confidence": 0.7207031,
|
||||
"punctuated_word": "test"
|
||||
},
|
||||
{
|
||||
"word": "that",
|
||||
"start": 1.52,
|
||||
"end": 2.1599998,
|
||||
"confidence": 0.40307617,
|
||||
"punctuated_word": "that"
|
||||
},
|
||||
{
|
||||
"word": "i",
|
||||
"start": 2.1599998,
|
||||
"end": 2.3999999,
|
||||
"confidence": 0.99316406,
|
||||
"punctuated_word": "I"
|
||||
},
|
||||
{
|
||||
"word": "am",
|
||||
"start": 2.3999999,
|
||||
"end": 2.48,
|
||||
"confidence": 0.52783203,
|
||||
"punctuated_word": "am"
|
||||
},
|
||||
{
|
||||
"word": "just",
|
||||
"start": 2.48,
|
||||
"end": 2.72,
|
||||
"confidence": 0.27270508,
|
||||
"punctuated_word": "just"
|
||||
},
|
||||
{
|
||||
"word": "trying",
|
||||
"start": 2.72,
|
||||
"end": 3.12,
|
||||
"confidence": 0.81591797,
|
||||
"punctuated_word": "trying"
|
||||
},
|
||||
{
|
||||
"word": "do",
|
||||
"start": 3.12,
|
||||
"end": 3.28,
|
||||
"confidence": 0.9116211,
|
||||
"punctuated_word": "do"
|
||||
},
|
||||
{
|
||||
"word": "so",
|
||||
"start": 3.28,
|
||||
"end": 3.4399998,
|
||||
"confidence": 0.37774658,
|
||||
"punctuated_word": "so."
|
||||
},
|
||||
{
|
||||
"word": "multiple",
|
||||
"start": 3.6,
|
||||
"end": 3.84,
|
||||
"confidence": 0.74072266,
|
||||
"punctuated_word": "Multiple"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.506603500107303,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 4.53,
|
||||
"start": 0.0,
|
||||
"is_final": true,
|
||||
"speech_final": true,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "Is the test that I am testing multiple speaker",
|
||||
"confidence": 0.65966797,
|
||||
"words": [
|
||||
{
|
||||
"word": "is",
|
||||
"start": 0.24,
|
||||
"end": 0.39999998,
|
||||
"confidence": 0.83984375,
|
||||
"punctuated_word": "Is"
|
||||
},
|
||||
{
|
||||
"word": "the",
|
||||
"start": 0.39999998,
|
||||
"end": 0.79999995,
|
||||
"confidence": 0.15722656,
|
||||
"punctuated_word": "the"
|
||||
},
|
||||
{
|
||||
"word": "test",
|
||||
"start": 1.12,
|
||||
"end": 1.52,
|
||||
"confidence": 0.8588867,
|
||||
"punctuated_word": "test"
|
||||
},
|
||||
{
|
||||
"word": "that",
|
||||
"start": 1.52,
|
||||
"end": 2.1599998,
|
||||
"confidence": 0.35107422,
|
||||
"punctuated_word": "that"
|
||||
},
|
||||
{
|
||||
"word": "i",
|
||||
"start": 2.1599998,
|
||||
"end": 2.32,
|
||||
"confidence": 0.99121094,
|
||||
"punctuated_word": "I"
|
||||
},
|
||||
{
|
||||
"word": "am",
|
||||
"start": 2.32,
|
||||
"end": 2.48,
|
||||
"confidence": 0.6010742,
|
||||
"punctuated_word": "am"
|
||||
},
|
||||
{
|
||||
"word": "testing",
|
||||
"start": 2.48,
|
||||
"end": 3.12,
|
||||
"confidence": 0.9526367,
|
||||
"punctuated_word": "testing"
|
||||
},
|
||||
{
|
||||
"word": "multiple",
|
||||
"start": 3.4399998,
|
||||
"end": 3.84,
|
||||
"confidence": 0.65966797,
|
||||
"punctuated_word": "multiple"
|
||||
},
|
||||
{
|
||||
"word": "speaker",
|
||||
"start": 3.84,
|
||||
"end": 4.3199997,
|
||||
"confidence": 0.20446777,
|
||||
"punctuated_word": "speaker"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.648572708014399,
|
||||
"event_type": "SpeechStarted",
|
||||
"data": {
|
||||
"type": "SpeechStarted",
|
||||
"channel": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"timestamp": 4.63
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.556989792035893,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 1.0699372,
|
||||
"start": 4.53,
|
||||
"is_final": false,
|
||||
"speech_final": false,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.615257542114705,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 2.08,
|
||||
"start": 4.53,
|
||||
"is_final": true,
|
||||
"speech_final": true,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.769657667027786,
|
||||
"event_type": "SpeechStarted",
|
||||
"data": {
|
||||
"type": "SpeechStarted",
|
||||
"channel": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"timestamp": 6.72
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.672739624977112,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 1.0099998,
|
||||
"start": 6.61,
|
||||
"is_final": true,
|
||||
"speech_final": true,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.081677624955773,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 0.3676877,
|
||||
"start": 7.62,
|
||||
"is_final": true,
|
||||
"speech_final": true,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.083154707914218,
|
||||
"event_type": "Results",
|
||||
"data": {
|
||||
"type": "Results",
|
||||
"channel_index": [
|
||||
0,
|
||||
1
|
||||
],
|
||||
"duration": 0.0,
|
||||
"start": 7.9876876,
|
||||
"is_final": true,
|
||||
"speech_final": true,
|
||||
"channel": {
|
||||
"alternatives": [
|
||||
{
|
||||
"transcript": "",
|
||||
"confidence": 0.0,
|
||||
"words": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"model_info": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
},
|
||||
"model_uuid": "40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
},
|
||||
"from_finalize": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 8.083194707985967,
|
||||
"event_type": "Metadata",
|
||||
"data": {
|
||||
"type": "Metadata",
|
||||
"transaction_key": "deprecated",
|
||||
"request_id": "39481f46-cd5f-40b1-9a55-a6635d8c06d9",
|
||||
"sha256": "a6f954deb3fb3bf7a3c420061d5dd968251ba401d6304e6cd2fc9f396c12da77",
|
||||
"created": "2026-01-20T06:44:57.522Z",
|
||||
"duration": 7.9876876,
|
||||
"channels": 1,
|
||||
"models": [
|
||||
"40bd3654-e622-47c4-a111-63a61b23bfe8"
|
||||
],
|
||||
"model_info": {
|
||||
"40bd3654-e622-47c4-a111-63a61b23bfe8": {
|
||||
"name": "general-nova-3",
|
||||
"version": "2025-04-17.21547",
|
||||
"arch": "nova-3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": "Is the test that I am testing multiple speaker"
|
||||
}
|
||||
445
evals/stt/results/nope-deepgram-flux.json
Normal file
445
evals/stt/results/nope-deepgram-flux.json
Normal file
|
|
@ -0,0 +1,445 @@
|
|||
{
|
||||
"audio_file": "nope.m4a",
|
||||
"audio_path": "../audio/nope.m4a",
|
||||
"provider": "deepgram-flux",
|
||||
"duration": 3.390113,
|
||||
"created_at": "2026-01-20T13:34:04.075559",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 3.3294782042503357e-07,
|
||||
"event_type": "Connected",
|
||||
"data": {
|
||||
"type": "Connected",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"sequence_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.6400237919297069,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1726,
|
||||
"sequence_id": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.850623874925077,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0643,
|
||||
"sequence_id": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0877662498969585,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0343,
|
||||
"sequence_id": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.3602930000051856,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.023,
|
||||
"sequence_id": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.5734205420594662,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "StartOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.2,
|
||||
"transcript": "No.",
|
||||
"words": [
|
||||
{
|
||||
"word": "No.",
|
||||
"confidence": 0.9956
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.1445,
|
||||
"sequence_id": 5
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.7732612078543752,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "EndOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.36,
|
||||
"transcript": "No.",
|
||||
"words": [
|
||||
{
|
||||
"word": "No.",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.7266,
|
||||
"sequence_id": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.0032672078814358,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 1.6,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2114,
|
||||
"sequence_id": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.272528207860887,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 1.8399999,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2886,
|
||||
"sequence_id": 8
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.4770477078855038,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 2.08,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1366,
|
||||
"sequence_id": 9
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.7586996669415385,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 2.32,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0687,
|
||||
"sequence_id": 10
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.9688463748898357,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 2.56,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0571,
|
||||
"sequence_id": 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.2333728750236332,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 2.8,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0284,
|
||||
"sequence_id": 12
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.4381651668809354,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 3.04,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0352,
|
||||
"sequence_id": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.7163160829804838,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 3.28,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0211,
|
||||
"sequence_id": 14
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.936306041898206,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 3.52,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0123,
|
||||
"sequence_id": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.212840874912217,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 3.76,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0399,
|
||||
"sequence_id": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.417071416974068,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 4.0,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0503,
|
||||
"sequence_id": 17
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.685962416930124,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 4.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0443,
|
||||
"sequence_id": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.898042541928589,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 4.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0367,
|
||||
"sequence_id": 19
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.167347207898274,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 4.7200003,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0221,
|
||||
"sequence_id": 20
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.415992958005518,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 4.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1116,
|
||||
"sequence_id": 21
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.703707166947424,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 5.2,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0883,
|
||||
"sequence_id": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.923421707935631,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 5.44,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0663,
|
||||
"sequence_id": 23
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.128664416959509,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 5.68,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0324,
|
||||
"sequence_id": 24
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.382756792008877,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 5.92,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0138,
|
||||
"sequence_id": 25
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.629080249927938,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "b42d9771-4a63-4c7f-aa89-33370cd70d23",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.36,
|
||||
"audio_window_end": 6.16,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0064,
|
||||
"sequence_id": 26
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": "No."
|
||||
}
|
||||
678
evals/stt/results/not_so_sure-deepgram-flux.json
Normal file
678
evals/stt/results/not_so_sure-deepgram-flux.json
Normal file
|
|
@ -0,0 +1,678 @@
|
|||
{
|
||||
"audio_file": "not_so_sure.m4a",
|
||||
"audio_path": "../audio/not_so_sure.m4a",
|
||||
"provider": "deepgram-flux",
|
||||
"duration": 3.784853,
|
||||
"created_at": "2026-01-20T13:34:30.619814",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 4.1606836020946503e-07,
|
||||
"event_type": "Connected",
|
||||
"data": {
|
||||
"type": "Connected",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"sequence_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.6479636249132454,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2837,
|
||||
"sequence_id": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.8711565409321338,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1409,
|
||||
"sequence_id": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0940386659931391,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.103,
|
||||
"sequence_id": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.3378053328488022,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "StartOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.96,
|
||||
"transcript": "I don",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 0.8521
|
||||
},
|
||||
{
|
||||
"word": "don",
|
||||
"confidence": 0.9858
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.1526,
|
||||
"sequence_id": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.575752625009045,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.2,
|
||||
"transcript": "I don't know",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know",
|
||||
"confidence": 0.9956
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.0815,
|
||||
"sequence_id": 5
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.809568207943812,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.44,
|
||||
"transcript": "I don't know. I",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know.",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 0.9995
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.0533,
|
||||
"sequence_id": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.0778977079316974,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.6800001,
|
||||
"transcript": "I don't know. I'm not",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know.",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "I'm",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "not",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.0296,
|
||||
"sequence_id": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.3323032909538597,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.9200001,
|
||||
"transcript": "I don't know. I'm not sure she",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know.",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "I'm",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "not",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "sure",
|
||||
"confidence": 0.9692
|
||||
},
|
||||
{
|
||||
"word": "she",
|
||||
"confidence": 0.6968
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.1591,
|
||||
"sequence_id": 8
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.563972583040595,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.16,
|
||||
"transcript": "I don't know. I'm not so sure.",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know.",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "I'm",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "not",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "so",
|
||||
"confidence": 0.9971
|
||||
},
|
||||
{
|
||||
"word": "sure.",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.5312,
|
||||
"sequence_id": 9
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.766235665883869,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "EndOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 2.32,
|
||||
"transcript": "I don't know. I'm not so sure.",
|
||||
"words": [
|
||||
{
|
||||
"word": "I",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "don't",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "know.",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "I'm",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "not",
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"word": "so",
|
||||
"confidence": 0.9971
|
||||
},
|
||||
{
|
||||
"word": "sure.",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.7129,
|
||||
"sequence_id": 10
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.980985000031069,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 2.56,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.6235,
|
||||
"sequence_id": 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.040183125063777,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 2.6399999,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.7163,
|
||||
"sequence_id": 12
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.134053166024387,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 2.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.7603,
|
||||
"sequence_id": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.200523457955569,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 2.8,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.8013,
|
||||
"sequence_id": 14
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.3396010829601437,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 2.88,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.8052,
|
||||
"sequence_id": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.462065916042775,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 3.04,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.6968,
|
||||
"sequence_id": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.532107833074406,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 3.12,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.7026,
|
||||
"sequence_id": 17
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.6854247499722987,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 3.28,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.6123,
|
||||
"sequence_id": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.9346718329470605,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 3.52,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.4551,
|
||||
"sequence_id": 19
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.174561291001737,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 3.76,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.293,
|
||||
"sequence_id": 20
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.423174874857068,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 4.0,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1186,
|
||||
"sequence_id": 21
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.661856249906123,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 4.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1186,
|
||||
"sequence_id": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.934342915890738,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 4.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0629,
|
||||
"sequence_id": 23
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.1988217500038445,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 4.7200003,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0302,
|
||||
"sequence_id": 24
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.868438957957551,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 4.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0104,
|
||||
"sequence_id": 25
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.924830165924504,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 5.2,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0039,
|
||||
"sequence_id": 26
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.008775374852121,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 5.44,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.003,
|
||||
"sequence_id": 27
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.224981207866222,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 5.68,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0027,
|
||||
"sequence_id": 28
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.400387583067641,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 5.92,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0944,
|
||||
"sequence_id": 29
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.6102081660646945,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 6.16,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.083,
|
||||
"sequence_id": 30
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.853603166062385,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 6.4,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0674,
|
||||
"sequence_id": 31
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.1176844160072505,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "badd4484-3b22-42c5-bd5f-13fd2014021b",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 2.32,
|
||||
"audio_window_end": 6.64,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0348,
|
||||
"sequence_id": 32
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": "I don't know. I'm not so sure."
|
||||
}
|
||||
936
evals/stt/results/not_so_sure-speechmatics.json
Normal file
936
evals/stt/results/not_so_sure-speechmatics.json
Normal file
|
|
@ -0,0 +1,936 @@
|
|||
{
|
||||
"audio_file": "not_so_sure.m4a",
|
||||
"audio_path": "../audio/not_so_sure.m4a",
|
||||
"provider": "speechmatics",
|
||||
"duration": 3.784853,
|
||||
"created_at": "2026-01-20T13:38:01.957263",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 2.50060111284256e-07,
|
||||
"event_type": "Info",
|
||||
"data": {
|
||||
"message": "Info",
|
||||
"type": "concurrent_session_usage",
|
||||
"reason": "1 concurrent sessions active out of quota 2",
|
||||
"usage": 1,
|
||||
"quota": 2,
|
||||
"last_updated": "2026-01-20T08:07:53Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.17636274988763034,
|
||||
"event_type": "RecognitionStarted",
|
||||
"data": {
|
||||
"message": "RecognitionStarted",
|
||||
"orchestrator_version": "2026.01.09+e449221ca0+14.12.0",
|
||||
"id": "ff50bcc6-03cc-4609-b52b-c61492be97b0",
|
||||
"language_pack_info": {
|
||||
"adapted": false,
|
||||
"itn": true,
|
||||
"language_description": "English",
|
||||
"word_delimiter": " ",
|
||||
"writing_direction": "left-to-right"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.1765422080643475,
|
||||
"event_type": "Info",
|
||||
"data": {
|
||||
"message": "Info",
|
||||
"type": "recognition_quality",
|
||||
"reason": "Running recognition using a broadcast model quality.",
|
||||
"quality": "broadcast"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.44156987499445677,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.5090052080340683,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.5927771248389035,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.6792412919458002,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.7540834578685462,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 5
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.8363401249516755,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.916276125004515,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0025545828975737,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 8
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0930295418947935,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 9
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.1681176249403507,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 10
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.2440201670397073,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.3254928330425173,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 12
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.411379124969244,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.4989973329938948,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 14
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.569762917002663,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.6669557499699295,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.7321407499257475,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 17
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.8123597078956664,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.89311487483792,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 19
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.99575070803985,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 20
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.0635348330251873,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 21
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.136281125014648,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.2212352079804987,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 23
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.300102249952033,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 24
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.3838018749374896,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 25
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.4612751249223948,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 26
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.5520844168495387,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 27
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.6254100420046598,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 28
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.7110170419327915,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 29
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.793728666845709,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 30
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.8698849170468748,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 31
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.9517348748631775,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 32
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.034996416885406,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 33
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.1222795830108225,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 34
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.2133053748402745,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 35
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.2794892080128193,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 36
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.360972832888365,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 37
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.480351625010371,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 38
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.527200457872823,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 39
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.614834832958877,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 40
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.7000621668994427,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 41
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.7709098330233246,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.870571249863133,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 43
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.9319135828409344,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 44
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.0240056668408215,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 45
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.1135993748903275,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 46
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.178906166926026,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 47
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.262735291849822,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 48
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.3524885000661016,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 49
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.42170758289285,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 50
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.503200083039701,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 51
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.588893749983981,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 52
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.6728779170662165,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 53
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.749415792059153,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 54
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.834314750041813,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 55
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.934304124908522,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 56
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.015187042066827,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 57
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.083739625057206,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 58
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.15739579196088,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 59
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.254215708002448,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 60
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.319055167026818,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 61
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.422228208044544,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 62
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.493815457914025,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 63
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.562712874962017,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 64
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.677756666904315,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 65
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.728489124914631,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 66
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.73234708304517,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "I",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 0.8,
|
||||
"start_time": 0.64,
|
||||
"type": "word"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"end_time": 0.8,
|
||||
"start_time": 0.0,
|
||||
"transcript": "I "
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.831468666903675,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 67
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.9311752079520375,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 68
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.970860542031005,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 69
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.0573643748648465,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 70
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.071638958062977,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "don't",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 1.08,
|
||||
"start_time": 0.84,
|
||||
"type": "word"
|
||||
},
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "know",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 1.2,
|
||||
"start_time": 1.08,
|
||||
"type": "word"
|
||||
},
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": ".",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"attaches_to": "previous",
|
||||
"end_time": 1.2,
|
||||
"is_eos": true,
|
||||
"start_time": 1.2,
|
||||
"type": "punctuation"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"end_time": 1.2,
|
||||
"start_time": 0.8,
|
||||
"transcript": "don't know. "
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.143923291936517,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 71
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.229828458046541,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 72
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.297467292053625,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 73
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.388417499838397,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 74
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.46747541683726,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "I'm",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 1.4,
|
||||
"start_time": 1.2,
|
||||
"type": "word"
|
||||
},
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "not",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 1.56,
|
||||
"start_time": 1.4,
|
||||
"type": "word"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"end_time": 1.56,
|
||||
"start_time": 1.2,
|
||||
"transcript": "I'm not "
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.467542249942198,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 75
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.571689167059958,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 76
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.633496082853526,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 77
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.705628624884412,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 78
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.791943500051275,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 79
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.8231504168361425,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "so",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 1.72,
|
||||
"start_time": 1.56,
|
||||
"type": "word"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"end_time": 1.72,
|
||||
"start_time": 1.56,
|
||||
"transcript": "so "
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.889297208050266,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 80
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 6.96820458304137,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 81
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.030788874952123,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 82
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.114988874876872,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 83
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.1660370419267565,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": "sure",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"end_time": 2.2,
|
||||
"start_time": 1.76,
|
||||
"type": "word"
|
||||
},
|
||||
{
|
||||
"alternatives": [
|
||||
{
|
||||
"confidence": 1.0,
|
||||
"content": ".",
|
||||
"language": "en"
|
||||
}
|
||||
],
|
||||
"attaches_to": "previous",
|
||||
"end_time": 2.2,
|
||||
"is_eos": true,
|
||||
"start_time": 2.2,
|
||||
"type": "punctuation"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"end_time": 2.2,
|
||||
"start_time": 1.72,
|
||||
"transcript": "sure. "
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.197767958045006,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 84
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.281636083032936,
|
||||
"event_type": "AudioAdded",
|
||||
"data": {
|
||||
"message": "AudioAdded",
|
||||
"seq_no": 85
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.966639708029106,
|
||||
"event_type": "AddTranscript",
|
||||
"data": {
|
||||
"message": "AddTranscript",
|
||||
"format": "2.9",
|
||||
"results": [],
|
||||
"metadata": {
|
||||
"end_time": 6.72,
|
||||
"start_time": 2.28,
|
||||
"transcript": ""
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 7.966674832860008,
|
||||
"event_type": "EndOfTranscript",
|
||||
"data": {
|
||||
"message": "EndOfTranscript"
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": "I don't know. I'm not so sure."
|
||||
}
|
||||
2931
evals/stt/results/vad-deepgram-flux.json
Normal file
2931
evals/stt/results/vad-deepgram-flux.json
Normal file
File diff suppressed because it is too large
Load diff
1131
evals/stt/results/vad-deepgram.json
Normal file
1131
evals/stt/results/vad-deepgram.json
Normal file
File diff suppressed because it is too large
Load diff
402
evals/stt/results/yes-deepgram-flux.json
Normal file
402
evals/stt/results/yes-deepgram-flux.json
Normal file
|
|
@ -0,0 +1,402 @@
|
|||
{
|
||||
"audio_file": "yes.m4a",
|
||||
"audio_path": "../audio/yes.m4a",
|
||||
"provider": "deepgram-flux",
|
||||
"duration": 2.507755,
|
||||
"created_at": "2026-01-20T13:33:37.737569",
|
||||
"events": [
|
||||
{
|
||||
"timestamp": 2.0791776478290558e-07,
|
||||
"event_type": "Connected",
|
||||
"data": {
|
||||
"type": "Connected",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"sequence_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.6149860408622772,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2494,
|
||||
"sequence_id": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 0.8699209159240127,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1246,
|
||||
"sequence_id": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.0665327080059797,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0557,
|
||||
"sequence_id": 3
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.319559457944706,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "StartOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 0.96,
|
||||
"transcript": "Yes.",
|
||||
"words": [
|
||||
{
|
||||
"word": "Yes.",
|
||||
"confidence": 0.9761
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.0793,
|
||||
"sequence_id": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.5604322908911854,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.2,
|
||||
"transcript": "Yes.",
|
||||
"words": [
|
||||
{
|
||||
"word": "Yes.",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.5703,
|
||||
"sequence_id": 5
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.6325784579385072,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "EndOfTurn",
|
||||
"turn_index": 0,
|
||||
"audio_window_start": 0.0,
|
||||
"audio_window_end": 1.28,
|
||||
"transcript": "Yes.",
|
||||
"words": [
|
||||
{
|
||||
"word": "Yes.",
|
||||
"confidence": 1.0
|
||||
}
|
||||
],
|
||||
"end_of_turn_confidence": 0.7026,
|
||||
"sequence_id": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 1.897370790829882,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 1.52,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.4883,
|
||||
"sequence_id": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.117000916041434,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 1.76,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.3801,
|
||||
"sequence_id": 8
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.3733394159935415,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 2.0,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.2346,
|
||||
"sequence_id": 9
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.6072654998861253,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 2.24,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1049,
|
||||
"sequence_id": 10
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 2.85038537485525,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 2.48,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.075,
|
||||
"sequence_id": 11
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.091235165949911,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 2.72,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0218,
|
||||
"sequence_id": 12
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.3325049998238683,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 2.96,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.03,
|
||||
"sequence_id": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.577521916013211,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 3.2,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0189,
|
||||
"sequence_id": 14
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 3.8645569998770952,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 3.44,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0118,
|
||||
"sequence_id": 15
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.106258499901742,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 3.68,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0089,
|
||||
"sequence_id": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.346511875046417,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 3.92,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0073,
|
||||
"sequence_id": 17
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.589668208034709,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 4.16,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0053,
|
||||
"sequence_id": 18
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 4.826804416021332,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 4.4,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0034,
|
||||
"sequence_id": 19
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.060472874902189,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 4.64,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0024,
|
||||
"sequence_id": 20
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.304136332822964,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 4.88,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1091,
|
||||
"sequence_id": 21
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.544230999890715,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 5.12,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.1007,
|
||||
"sequence_id": 22
|
||||
}
|
||||
},
|
||||
{
|
||||
"timestamp": 5.779906540876254,
|
||||
"event_type": "TurnInfo",
|
||||
"data": {
|
||||
"type": "TurnInfo",
|
||||
"request_id": "277cf8d3-27b0-439b-a04e-707598e13489",
|
||||
"event": "Update",
|
||||
"turn_index": 1,
|
||||
"audio_window_start": 1.28,
|
||||
"audio_window_end": 5.36,
|
||||
"transcript": "",
|
||||
"words": [],
|
||||
"end_of_turn_confidence": 0.0565,
|
||||
"sequence_id": 23
|
||||
}
|
||||
}
|
||||
],
|
||||
"transcript": "Yes."
|
||||
}
|
||||
41
evals/visualizer/.gitignore
vendored
Normal file
41
evals/visualizer/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
36
evals/visualizer/README.md
Normal file
36
evals/visualizer/README.md
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
||||
|
||||
## Getting Started
|
||||
|
||||
First, run the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
# or
|
||||
yarn dev
|
||||
# or
|
||||
pnpm dev
|
||||
# or
|
||||
bun dev
|
||||
```
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
||||
|
||||
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
||||
|
||||
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about Next.js, take a look at the following resources:
|
||||
|
||||
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
||||
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
||||
|
||||
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
||||
|
||||
## Deploy on Vercel
|
||||
|
||||
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
||||
|
||||
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
||||
18
evals/visualizer/eslint.config.mjs
Normal file
18
evals/visualizer/eslint.config.mjs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import { defineConfig, globalIgnores } from "eslint/config";
|
||||
import nextVitals from "eslint-config-next/core-web-vitals";
|
||||
import nextTs from "eslint-config-next/typescript";
|
||||
|
||||
const eslintConfig = defineConfig([
|
||||
...nextVitals,
|
||||
...nextTs,
|
||||
// Override default ignores of eslint-config-next.
|
||||
globalIgnores([
|
||||
// Default ignores of eslint-config-next:
|
||||
".next/**",
|
||||
"out/**",
|
||||
"build/**",
|
||||
"next-env.d.ts",
|
||||
]),
|
||||
]);
|
||||
|
||||
export default eslintConfig;
|
||||
7
evals/visualizer/next.config.ts
Normal file
7
evals/visualizer/next.config.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
/* config options here */
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
26
evals/visualizer/package.json
Normal file
26
evals/visualizer/package.json
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"name": "visualizer",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "eslint"
|
||||
},
|
||||
"dependencies": {
|
||||
"next": "16.1.4",
|
||||
"react": "19.2.3",
|
||||
"react-dom": "19.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "16.1.4",
|
||||
"tailwindcss": "^4",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
4008
evals/visualizer/pnpm-lock.yaml
generated
Normal file
4008
evals/visualizer/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load diff
5
evals/visualizer/pnpm-workspace.yaml
Normal file
5
evals/visualizer/pnpm-workspace.yaml
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
packages:
|
||||
- .
|
||||
ignoredBuiltDependencies:
|
||||
- sharp
|
||||
- unrs-resolver
|
||||
7
evals/visualizer/postcss.config.mjs
Normal file
7
evals/visualizer/postcss.config.mjs
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
const config = {
|
||||
plugins: {
|
||||
"@tailwindcss/postcss": {},
|
||||
},
|
||||
};
|
||||
|
||||
export default config;
|
||||
1
evals/visualizer/public/file.svg
Normal file
1
evals/visualizer/public/file.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
|
||||
|
After Width: | Height: | Size: 391 B |
1
evals/visualizer/public/globe.svg
Normal file
1
evals/visualizer/public/globe.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
|
||||
|
After Width: | Height: | Size: 1 KiB |
1
evals/visualizer/public/next.svg
Normal file
1
evals/visualizer/public/next.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
|
||||
|
After Width: | Height: | Size: 1.3 KiB |
1
evals/visualizer/public/vercel.svg
Normal file
1
evals/visualizer/public/vercel.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
|
||||
|
After Width: | Height: | Size: 128 B |
1
evals/visualizer/public/window.svg
Normal file
1
evals/visualizer/public/window.svg
Normal file
|
|
@ -0,0 +1 @@
|
|||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
|
||||
|
After Width: | Height: | Size: 385 B |
42
evals/visualizer/src/app/api/audio/[filename]/route.ts
Normal file
42
evals/visualizer/src/app/api/audio/[filename]/route.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { NextRequest, NextResponse } from "next/server";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const AUDIO_DIR = path.join(process.cwd(), "..", "stt", "audio");
|
||||
|
||||
const MIME_TYPES: Record<string, string> = {
|
||||
".mp3": "audio/mpeg",
|
||||
".wav": "audio/wav",
|
||||
".m4a": "audio/mp4",
|
||||
".ogg": "audio/ogg",
|
||||
".webm": "audio/webm",
|
||||
};
|
||||
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ filename: string }> }
|
||||
) {
|
||||
try {
|
||||
const { filename } = await params;
|
||||
const filePath = path.join(AUDIO_DIR, filename);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return NextResponse.json({ error: "Audio file not found" }, { status: 404 });
|
||||
}
|
||||
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
const contentType = MIME_TYPES[ext] || "application/octet-stream";
|
||||
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
|
||||
return new NextResponse(fileBuffer, {
|
||||
headers: {
|
||||
"Content-Type": contentType,
|
||||
"Content-Length": fileBuffer.length.toString(),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error serving audio:", error);
|
||||
return NextResponse.json({ error: "Failed to serve audio" }, { status: 500 });
|
||||
}
|
||||
}
|
||||
27
evals/visualizer/src/app/api/results/[id]/route.ts
Normal file
27
evals/visualizer/src/app/api/results/[id]/route.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import { NextRequest, NextResponse } from "next/server";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
|
||||
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ id: string }> }
|
||||
) {
|
||||
try {
|
||||
const { id } = await params;
|
||||
const filePath = path.join(RESULTS_DIR, `${id}.json`);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
return NextResponse.json({ error: "Result not found" }, { status: 404 });
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, "utf-8");
|
||||
const data = JSON.parse(content);
|
||||
|
||||
return NextResponse.json(data);
|
||||
} catch (error) {
|
||||
console.error("Error reading result:", error);
|
||||
return NextResponse.json({ error: "Failed to read result" }, { status: 500 });
|
||||
}
|
||||
}
|
||||
47
evals/visualizer/src/app/api/results/route.ts
Normal file
47
evals/visualizer/src/app/api/results/route.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import { NextResponse } from "next/server";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { ResultSummary, EventCaptureResult } from "@/types";
|
||||
|
||||
const RESULTS_DIR = path.join(process.cwd(), "..", "stt", "results");
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
if (!fs.existsSync(RESULTS_DIR)) {
|
||||
return NextResponse.json([]);
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(RESULTS_DIR).filter((f) => f.endsWith(".json"));
|
||||
const results: ResultSummary[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const filePath = path.join(RESULTS_DIR, file);
|
||||
const content = fs.readFileSync(filePath, "utf-8");
|
||||
const data: EventCaptureResult = JSON.parse(content);
|
||||
|
||||
results.push({
|
||||
id: file.replace(".json", ""),
|
||||
audio_file: data.audio_file,
|
||||
provider: data.provider,
|
||||
duration: data.duration,
|
||||
created_at: data.created_at,
|
||||
event_count: data.events.length,
|
||||
});
|
||||
} catch {
|
||||
console.error(`Failed to parse ${file}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by created_at descending
|
||||
results.sort(
|
||||
(a, b) =>
|
||||
new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
|
||||
);
|
||||
|
||||
return NextResponse.json(results);
|
||||
} catch (error) {
|
||||
console.error("Error reading results:", error);
|
||||
return NextResponse.json({ error: "Failed to read results" }, { status: 500 });
|
||||
}
|
||||
}
|
||||
BIN
evals/visualizer/src/app/favicon.ico
Normal file
BIN
evals/visualizer/src/app/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
26
evals/visualizer/src/app/globals.css
Normal file
26
evals/visualizer/src/app/globals.css
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
@import "tailwindcss";
|
||||
|
||||
:root {
|
||||
--background: #ffffff;
|
||||
--foreground: #171717;
|
||||
}
|
||||
|
||||
@theme inline {
|
||||
--color-background: var(--background);
|
||||
--color-foreground: var(--foreground);
|
||||
--font-sans: var(--font-geist-sans);
|
||||
--font-mono: var(--font-geist-mono);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--background: #0a0a0a;
|
||||
--foreground: #ededed;
|
||||
}
|
||||
}
|
||||
|
||||
body {
|
||||
background: var(--background);
|
||||
color: var(--foreground);
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
}
|
||||
34
evals/visualizer/src/app/layout.tsx
Normal file
34
evals/visualizer/src/app/layout.tsx
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Geist, Geist_Mono } from "next/font/google";
|
||||
import "./globals.css";
|
||||
|
||||
const geistSans = Geist({
|
||||
variable: "--font-geist-sans",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
const geistMono = Geist_Mono({
|
||||
variable: "--font-geist-mono",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "STT Event Visualizer",
|
||||
description: "Visualize WebSocket events from STT providers",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: Readonly<{
|
||||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body
|
||||
className={`${geistSans.variable} ${geistMono.variable} antialiased`}
|
||||
>
|
||||
{children}
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
129
evals/visualizer/src/app/page.tsx
Normal file
129
evals/visualizer/src/app/page.tsx
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import Link from "next/link";
|
||||
import { ResultSummary } from "@/types";
|
||||
|
||||
function formatDuration(seconds: number): string {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
function formatDate(isoString: string): string {
|
||||
const date = new Date(isoString);
|
||||
return date.toLocaleDateString("en-US", {
|
||||
year: "numeric",
|
||||
month: "short",
|
||||
day: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
});
|
||||
}
|
||||
|
||||
const PROVIDER_COLORS: Record<string, string> = {
|
||||
deepgram: "bg-blue-500/20 text-blue-300",
|
||||
"deepgram-flux": "bg-green-500/20 text-green-300",
|
||||
speechmatics: "bg-purple-500/20 text-purple-300",
|
||||
};
|
||||
|
||||
export default function Home() {
|
||||
const [results, setResults] = useState<ResultSummary[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchResults() {
|
||||
try {
|
||||
const response = await fetch("/api/results");
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch results");
|
||||
}
|
||||
const data = await response.json();
|
||||
setResults(data);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Unknown error");
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
fetchResults();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-zinc-950 text-white">
|
||||
<div className="max-w-4xl mx-auto px-6 py-12">
|
||||
<header className="mb-12">
|
||||
<h1 className="text-3xl font-bold">STT Event Visualizer</h1>
|
||||
<p className="text-zinc-400 mt-2">
|
||||
Visualize captured WebSocket events from STT providers
|
||||
</p>
|
||||
</header>
|
||||
|
||||
{loading && (
|
||||
<div className="flex items-center justify-center py-12">
|
||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="bg-red-500/20 text-red-300 p-4 rounded-lg">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!loading && !error && results.length === 0 && (
|
||||
<div className="text-center py-12 text-zinc-500">
|
||||
<p className="text-lg mb-4">No results found</p>
|
||||
<p className="text-sm">
|
||||
Run the event capture script to generate results:
|
||||
</p>
|
||||
<code className="block mt-2 bg-zinc-800 p-3 rounded text-zinc-300 text-sm">
|
||||
python -m evals.stt.event_capture audio/multi_speaker.m4a --provider deepgram
|
||||
</code>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!loading && !error && results.length > 0 && (
|
||||
<div className="space-y-3">
|
||||
{results.map((result) => (
|
||||
<Link
|
||||
key={result.id}
|
||||
href={`/view/${result.id}`}
|
||||
className="block bg-zinc-900 hover:bg-zinc-800 rounded-lg p-4 transition-colors"
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="space-y-1">
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="font-medium">{result.audio_file}</span>
|
||||
<span
|
||||
className={`text-xs px-2 py-0.5 rounded ${
|
||||
PROVIDER_COLORS[result.provider] ||
|
||||
"bg-zinc-700 text-zinc-300"
|
||||
}`}
|
||||
>
|
||||
{result.provider}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-sm text-zinc-500">
|
||||
{formatDate(result.created_at)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-right space-y-1">
|
||||
<div className="text-sm text-zinc-400">
|
||||
{formatDuration(result.duration)}
|
||||
</div>
|
||||
<div className="text-xs text-zinc-500">
|
||||
{result.event_count} events
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Link>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
158
evals/visualizer/src/app/view/[id]/page.tsx
Normal file
158
evals/visualizer/src/app/view/[id]/page.tsx
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
"use client";
|
||||
|
||||
import { useEffect, useState, useCallback } from "react";
|
||||
import { useParams } from "next/navigation";
|
||||
import Link from "next/link";
|
||||
import { EventCaptureResult } from "@/types";
|
||||
import AudioPlayer from "@/components/AudioPlayer";
|
||||
import EventTimeline from "@/components/EventTimeline";
|
||||
import EventList from "@/components/EventList";
|
||||
|
||||
const PROVIDER_COLORS: Record<string, string> = {
|
||||
deepgram: "bg-blue-500/20 text-blue-300",
|
||||
"deepgram-flux": "bg-green-500/20 text-green-300",
|
||||
speechmatics: "bg-purple-500/20 text-purple-300",
|
||||
};
|
||||
|
||||
export default function ViewPage() {
|
||||
const params = useParams();
|
||||
const id = params.id as string;
|
||||
|
||||
const [result, setResult] = useState<EventCaptureResult | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchResult() {
|
||||
try {
|
||||
const response = await fetch(`/api/results/${id}`);
|
||||
if (!response.ok) {
|
||||
if (response.status === 404) {
|
||||
throw new Error("Result not found");
|
||||
}
|
||||
throw new Error("Failed to fetch result");
|
||||
}
|
||||
const data = await response.json();
|
||||
setResult(data);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Unknown error");
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (id) {
|
||||
fetchResult();
|
||||
}
|
||||
}, [id]);
|
||||
|
||||
const handleTimeUpdate = useCallback((time: number) => {
|
||||
setCurrentTime(time);
|
||||
}, []);
|
||||
|
||||
const handlePlayingChange = useCallback((playing: boolean) => {
|
||||
setIsPlaying(playing);
|
||||
}, []);
|
||||
|
||||
const handleSeek = useCallback((time: number) => {
|
||||
setCurrentTime(time);
|
||||
}, []);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-zinc-950 text-white flex items-center justify-center">
|
||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-white"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="min-h-screen bg-zinc-950 text-white p-6">
|
||||
<div className="max-w-4xl mx-auto">
|
||||
<Link href="/" className="text-zinc-400 hover:text-white mb-4 inline-block">
|
||||
← Back to results
|
||||
</Link>
|
||||
<div className="bg-red-500/20 text-red-300 p-4 rounded-lg">{error}</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const audioUrl = `/api/audio/${result.audio_file}`;
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-zinc-950 text-white">
|
||||
<div className="max-w-7xl mx-auto px-6 py-6">
|
||||
{/* Header */}
|
||||
<header className="mb-6">
|
||||
<Link href="/" className="text-zinc-400 hover:text-white mb-2 inline-block text-sm">
|
||||
← Back to results
|
||||
</Link>
|
||||
<div className="flex items-center gap-3">
|
||||
<h1 className="text-2xl font-bold">{result.audio_file}</h1>
|
||||
<span
|
||||
className={`text-sm px-2 py-0.5 rounded ${
|
||||
PROVIDER_COLORS[result.provider] || "bg-zinc-700 text-zinc-300"
|
||||
}`}
|
||||
>
|
||||
{result.provider}
|
||||
</span>
|
||||
</div>
|
||||
{result.transcript && (
|
||||
<p className="text-zinc-400 mt-2 text-sm line-clamp-2">
|
||||
{result.transcript}
|
||||
</p>
|
||||
)}
|
||||
</header>
|
||||
|
||||
{/* Main content */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
||||
{/* Left column: Audio player and timeline */}
|
||||
<div className="lg:col-span-2 space-y-4">
|
||||
<AudioPlayer
|
||||
audioUrl={audioUrl}
|
||||
duration={result.duration}
|
||||
currentTime={currentTime}
|
||||
onTimeUpdate={handleTimeUpdate}
|
||||
onPlayingChange={handlePlayingChange}
|
||||
/>
|
||||
|
||||
<EventTimeline
|
||||
events={result.events}
|
||||
duration={result.duration}
|
||||
currentTime={currentTime}
|
||||
onSeek={handleSeek}
|
||||
/>
|
||||
|
||||
{/* Transcript section */}
|
||||
{result.transcript && (
|
||||
<div className="bg-zinc-800 rounded-lg p-4">
|
||||
<div className="text-sm text-zinc-400 font-medium mb-2">
|
||||
Final Transcript
|
||||
</div>
|
||||
<p className="text-zinc-300">{result.transcript}</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Right column: Event list */}
|
||||
<div className="lg:col-span-1 h-[calc(100vh-12rem)]">
|
||||
<EventList
|
||||
events={result.events}
|
||||
currentTime={currentTime}
|
||||
onSeek={handleSeek}
|
||||
provider={result.provider}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
145
evals/visualizer/src/components/AudioPlayer.tsx
Normal file
145
evals/visualizer/src/components/AudioPlayer.tsx
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
"use client";
|
||||
|
||||
import { useRef, useEffect, useState, useCallback } from "react";
|
||||
|
||||
interface AudioPlayerProps {
|
||||
audioUrl: string;
|
||||
duration: number;
|
||||
currentTime: number;
|
||||
onTimeUpdate: (time: number) => void;
|
||||
onPlayingChange: (playing: boolean) => void;
|
||||
}
|
||||
|
||||
function formatTime(seconds: number): string {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export default function AudioPlayer({
|
||||
audioUrl,
|
||||
duration,
|
||||
currentTime,
|
||||
onTimeUpdate,
|
||||
onPlayingChange,
|
||||
}: AudioPlayerProps) {
|
||||
const audioRef = useRef<HTMLAudioElement>(null);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [internalTime, setInternalTime] = useState(0);
|
||||
|
||||
useEffect(() => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
const handleTimeUpdate = () => {
|
||||
setInternalTime(audio.currentTime);
|
||||
onTimeUpdate(audio.currentTime);
|
||||
};
|
||||
|
||||
const handlePlay = () => {
|
||||
setIsPlaying(true);
|
||||
onPlayingChange(true);
|
||||
};
|
||||
|
||||
const handlePause = () => {
|
||||
setIsPlaying(false);
|
||||
onPlayingChange(false);
|
||||
};
|
||||
|
||||
const handleEnded = () => {
|
||||
setIsPlaying(false);
|
||||
onPlayingChange(false);
|
||||
};
|
||||
|
||||
audio.addEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.addEventListener("play", handlePlay);
|
||||
audio.addEventListener("pause", handlePause);
|
||||
audio.addEventListener("ended", handleEnded);
|
||||
|
||||
return () => {
|
||||
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.removeEventListener("play", handlePlay);
|
||||
audio.removeEventListener("pause", handlePause);
|
||||
audio.removeEventListener("ended", handleEnded);
|
||||
};
|
||||
}, [onTimeUpdate, onPlayingChange]);
|
||||
|
||||
// Seek to currentTime when it changes externally
|
||||
useEffect(() => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
// Only seek if the difference is significant (user clicked timeline)
|
||||
if (Math.abs(audio.currentTime - currentTime) > 0.5) {
|
||||
audio.currentTime = currentTime;
|
||||
}
|
||||
}, [currentTime]);
|
||||
|
||||
const togglePlay = useCallback(() => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
if (isPlaying) {
|
||||
audio.pause();
|
||||
} else {
|
||||
audio.play();
|
||||
}
|
||||
}, [isPlaying]);
|
||||
|
||||
const handleSeek = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
const newTime = parseFloat(e.target.value);
|
||||
audio.currentTime = newTime;
|
||||
setInternalTime(newTime);
|
||||
onTimeUpdate(newTime);
|
||||
}, [onTimeUpdate]);
|
||||
|
||||
return (
|
||||
<div className="bg-zinc-900 rounded-lg p-4 space-y-3">
|
||||
<audio ref={audioRef} src={audioUrl} preload="metadata" />
|
||||
|
||||
<div className="flex items-center gap-4">
|
||||
<button
|
||||
onClick={togglePlay}
|
||||
className="w-12 h-12 rounded-full bg-white text-black flex items-center justify-center hover:bg-zinc-200 transition-colors"
|
||||
>
|
||||
{isPlaying ? (
|
||||
<svg className="w-5 h-5" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zM7 8a1 1 0 012 0v4a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v4a1 1 0 102 0V8a1 1 0 00-1-1z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-5 h-5 ml-1" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path
|
||||
fillRule="evenodd"
|
||||
d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z"
|
||||
clipRule="evenodd"
|
||||
/>
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
|
||||
<div className="flex-1 space-y-1">
|
||||
<input
|
||||
type="range"
|
||||
min={0}
|
||||
max={duration}
|
||||
step={0.1}
|
||||
value={internalTime}
|
||||
onChange={handleSeek}
|
||||
className="w-full h-2 bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-white"
|
||||
/>
|
||||
<div className="flex justify-between text-xs text-zinc-400">
|
||||
<span>{formatTime(internalTime)}</span>
|
||||
<span>{formatTime(duration)}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
141
evals/visualizer/src/components/EventList.tsx
Normal file
141
evals/visualizer/src/components/EventList.tsx
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"use client";
|
||||
|
||||
import { useEffect, useRef, useMemo, useState } from "react";
|
||||
import { CapturedEvent } from "@/types";
|
||||
import { DeepgramEventItem, FluxEventItem, SpeechmaticsEventItem } from "./events";
|
||||
|
||||
interface EventListProps {
|
||||
events: CapturedEvent[];
|
||||
currentTime: number;
|
||||
onSeek: (time: number) => void;
|
||||
provider: string;
|
||||
}
|
||||
|
||||
function formatTime(seconds: number): string {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
const ms = Math.floor((seconds % 1) * 100);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}.${ms.toString().padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
function getEventItemComponent(provider: string) {
|
||||
if (provider === "deepgram-flux") {
|
||||
return FluxEventItem;
|
||||
}
|
||||
if (provider === "speechmatics") {
|
||||
return SpeechmaticsEventItem;
|
||||
}
|
||||
// Default to Deepgram Nova
|
||||
return DeepgramEventItem;
|
||||
}
|
||||
|
||||
export default function EventList({
|
||||
events,
|
||||
currentTime,
|
||||
onSeek,
|
||||
provider,
|
||||
}: EventListProps) {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [expandedEvents, setExpandedEvents] = useState<Set<number>>(new Set());
|
||||
const [autoScroll, setAutoScroll] = useState(true);
|
||||
|
||||
const EventItemComponent = getEventItemComponent(provider);
|
||||
|
||||
// Find the current event index based on time
|
||||
const currentEventIndex = useMemo(() => {
|
||||
for (let i = events.length - 1; i >= 0; i--) {
|
||||
if (events[i].timestamp <= currentTime) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}, [events, currentTime]);
|
||||
|
||||
// Auto-scroll to current event
|
||||
useEffect(() => {
|
||||
if (!autoScroll || currentEventIndex < 0) return;
|
||||
|
||||
const container = containerRef.current;
|
||||
if (!container) return;
|
||||
|
||||
const eventElement = container.querySelector(`[data-index="${currentEventIndex}"]`);
|
||||
if (eventElement) {
|
||||
eventElement.scrollIntoView({ behavior: "smooth", block: "center" });
|
||||
}
|
||||
}, [currentEventIndex, autoScroll]);
|
||||
|
||||
const toggleExpand = (index: number) => {
|
||||
setExpandedEvents((prev) => {
|
||||
const next = new Set(prev);
|
||||
if (next.has(index)) {
|
||||
next.delete(index);
|
||||
} else {
|
||||
next.add(index);
|
||||
}
|
||||
return next;
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="bg-zinc-800 rounded-lg flex flex-col h-full">
|
||||
<div className="flex justify-between items-center px-4 py-2 border-b border-zinc-700">
|
||||
<div className="text-sm text-zinc-400 font-medium">
|
||||
Events ({events.length})
|
||||
</div>
|
||||
<label className="flex items-center gap-2 text-xs text-zinc-500 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={autoScroll}
|
||||
onChange={(e) => setAutoScroll(e.target.checked)}
|
||||
className="rounded"
|
||||
/>
|
||||
Auto-scroll
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div
|
||||
ref={containerRef}
|
||||
className="flex-1 overflow-y-auto divide-y divide-zinc-700/50"
|
||||
>
|
||||
{events.map((event, index) => {
|
||||
const isCurrent = index === currentEventIndex;
|
||||
const isExpanded = expandedEvents.has(index);
|
||||
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
data-index={index}
|
||||
className={`p-3 cursor-pointer transition-colors ${
|
||||
isCurrent ? "bg-zinc-700/50" : "hover:bg-zinc-700/30"
|
||||
}`}
|
||||
onClick={() => onSeek(event.timestamp)}
|
||||
>
|
||||
<div className="flex items-start gap-2">
|
||||
{/* Current indicator */}
|
||||
<div className="pt-1">
|
||||
{isCurrent ? (
|
||||
<div className="w-2 h-2 rounded-full bg-white" />
|
||||
) : (
|
||||
<div className="w-2 h-2 rounded-full bg-zinc-600" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Timestamp */}
|
||||
<span className="text-xs text-zinc-500 font-mono pt-0.5">
|
||||
{formatTime(event.timestamp)}
|
||||
</span>
|
||||
|
||||
{/* Provider-specific event item */}
|
||||
<EventItemComponent
|
||||
event={event}
|
||||
isExpanded={isExpanded}
|
||||
onToggleExpand={() => toggleExpand(index)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
119
evals/visualizer/src/components/EventTimeline.tsx
Normal file
119
evals/visualizer/src/components/EventTimeline.tsx
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"use client";
|
||||
|
||||
import { useMemo } from "react";
|
||||
import { CapturedEvent } from "@/types";
|
||||
|
||||
interface EventTimelineProps {
|
||||
events: CapturedEvent[];
|
||||
duration: number;
|
||||
currentTime: number;
|
||||
onSeek: (time: number) => void;
|
||||
}
|
||||
|
||||
const EVENT_COLORS: Record<string, string> = {
|
||||
Results: "bg-blue-500",
|
||||
TurnInfo: "bg-green-500",
|
||||
AddTranscript: "bg-purple-500",
|
||||
Connected: "bg-yellow-500",
|
||||
RecognitionStarted: "bg-yellow-500",
|
||||
EndOfTranscript: "bg-red-500",
|
||||
Metadata: "bg-gray-500",
|
||||
Error: "bg-red-600",
|
||||
default: "bg-zinc-400",
|
||||
};
|
||||
|
||||
function formatTime(seconds: number): string {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export default function EventTimeline({
|
||||
events,
|
||||
duration,
|
||||
currentTime,
|
||||
onSeek,
|
||||
}: EventTimelineProps) {
|
||||
const timeMarkers = useMemo(() => {
|
||||
const markers: number[] = [];
|
||||
const interval = Math.ceil(duration / 6);
|
||||
for (let i = 0; i <= duration; i += interval) {
|
||||
markers.push(i);
|
||||
}
|
||||
if (markers[markers.length - 1] !== Math.floor(duration)) {
|
||||
markers.push(Math.floor(duration));
|
||||
}
|
||||
return markers;
|
||||
}, [duration]);
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
const rect = e.currentTarget.getBoundingClientRect();
|
||||
const x = e.clientX - rect.left;
|
||||
const percent = x / rect.width;
|
||||
const time = percent * duration;
|
||||
onSeek(Math.max(0, Math.min(time, duration)));
|
||||
};
|
||||
|
||||
const progressPercent = (currentTime / duration) * 100;
|
||||
|
||||
return (
|
||||
<div className="bg-zinc-800 rounded-lg p-4 space-y-2">
|
||||
<div className="text-sm text-zinc-400 font-medium">Event Timeline</div>
|
||||
|
||||
<div
|
||||
className="relative h-16 bg-zinc-900 rounded cursor-pointer overflow-hidden"
|
||||
onClick={handleClick}
|
||||
>
|
||||
{/* Progress indicator */}
|
||||
<div
|
||||
className="absolute top-0 bottom-0 bg-zinc-700/50 pointer-events-none"
|
||||
style={{ width: `${Math.min(progressPercent, 100)}%` }}
|
||||
/>
|
||||
|
||||
{/* Current time indicator */}
|
||||
<div
|
||||
className="absolute top-0 bottom-0 w-0.5 bg-white z-10 pointer-events-none"
|
||||
style={{ left: `${Math.min(progressPercent, 100)}%` }}
|
||||
/>
|
||||
|
||||
{/* Event markers */}
|
||||
<div className="absolute inset-0 flex items-center">
|
||||
{events.map((event, index) => {
|
||||
const leftPercent = Math.min((event.timestamp / duration) * 100, 100);
|
||||
const colorClass =
|
||||
EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className={`absolute w-2 h-8 rounded-sm ${colorClass} opacity-80 hover:opacity-100 transition-opacity`}
|
||||
style={{ left: `${leftPercent}%`, transform: "translateX(-50%)" }}
|
||||
title={`${formatTime(event.timestamp)} - ${event.event_type}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Time markers */}
|
||||
<div className="flex justify-between text-xs text-zinc-500">
|
||||
{timeMarkers.map((time, index) => (
|
||||
<span key={index}>{formatTime(time)}</span>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex flex-wrap gap-3 pt-2">
|
||||
{Object.entries(EVENT_COLORS)
|
||||
.filter(([key]) => key !== "default")
|
||||
.slice(0, 6)
|
||||
.map(([eventType, colorClass]) => (
|
||||
<div key={eventType} className="flex items-center gap-1 text-xs text-zinc-400">
|
||||
<div className={`w-2 h-2 rounded-sm ${colorClass}`} />
|
||||
<span>{eventType}</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
98
evals/visualizer/src/components/events/DeepgramEventItem.tsx
Normal file
98
evals/visualizer/src/components/events/DeepgramEventItem.tsx
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
"use client";
|
||||
|
||||
import { CapturedEvent } from "@/types";
|
||||
|
||||
interface DeepgramEventItemProps {
|
||||
event: CapturedEvent;
|
||||
isExpanded: boolean;
|
||||
onToggleExpand: () => void;
|
||||
}
|
||||
|
||||
const EVENT_COLORS: Record<string, string> = {
|
||||
Results: "text-blue-400 bg-blue-500/10",
|
||||
SpeechStarted: "text-yellow-400 bg-yellow-500/10",
|
||||
Metadata: "text-gray-400 bg-gray-500/10",
|
||||
UtteranceEnd: "text-red-500 bg-red-600/10",
|
||||
default: "text-zinc-400 bg-zinc-500/10",
|
||||
};
|
||||
|
||||
function getTranscript(event: CapturedEvent): string {
|
||||
const data = event.data;
|
||||
const channel = data.channel as Record<string, unknown> | undefined;
|
||||
if (channel) {
|
||||
const alternatives = channel.alternatives as Array<{ transcript?: string }> | undefined;
|
||||
if (alternatives?.[0]?.transcript) {
|
||||
return alternatives[0].transcript;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
export default function DeepgramEventItem({
|
||||
event,
|
||||
isExpanded,
|
||||
onToggleExpand,
|
||||
}: DeepgramEventItemProps) {
|
||||
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
|
||||
const data = event.data;
|
||||
|
||||
const transcript = getTranscript(event);
|
||||
const isFinal = data.is_final as boolean | undefined;
|
||||
const speechFinal = data.speech_final as boolean | undefined;
|
||||
|
||||
// For non-Results events
|
||||
const isConnection = event.event_type === "Connected";
|
||||
const isMetadata = event.event_type === "Metadata";
|
||||
|
||||
return (
|
||||
<div className="flex-1 min-w-0 space-y-1">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
|
||||
{event.event_type}
|
||||
</span>
|
||||
|
||||
{/* Final/Partial indicator for Results */}
|
||||
{isFinal !== undefined && (
|
||||
<span
|
||||
className={`text-xs px-2 py-0.5 rounded ${isFinal
|
||||
? "text-emerald-400 bg-emerald-500/10"
|
||||
: "text-amber-400 bg-amber-500/10"
|
||||
}`}
|
||||
>
|
||||
{isFinal ? "Final" : "Partial"}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Speech Final indicator */}
|
||||
{speechFinal && (
|
||||
<span className="text-xs px-2 py-0.5 rounded text-cyan-400 bg-cyan-500/10">
|
||||
Speech Final
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Transcript or status message */}
|
||||
<div className="text-sm text-zinc-300 truncate">
|
||||
{transcript}
|
||||
</div>
|
||||
|
||||
{/* Expand/collapse button */}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onToggleExpand();
|
||||
}}
|
||||
className="text-xs text-zinc-500 hover:text-zinc-300"
|
||||
>
|
||||
{isExpanded ? "Hide details" : "Show details"}
|
||||
</button>
|
||||
|
||||
{/* Expanded JSON view */}
|
||||
{isExpanded && (
|
||||
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
|
||||
{JSON.stringify(event.data, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
115
evals/visualizer/src/components/events/FluxEventItem.tsx
Normal file
115
evals/visualizer/src/components/events/FluxEventItem.tsx
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"use client";
|
||||
|
||||
import { CapturedEvent } from "@/types";
|
||||
|
||||
interface FluxEventItemProps {
|
||||
event: CapturedEvent;
|
||||
isExpanded: boolean;
|
||||
onToggleExpand: () => void;
|
||||
}
|
||||
|
||||
const EVENT_COLORS: Record<string, string> = {
|
||||
TurnInfo: "text-green-400 bg-green-500/10",
|
||||
Connected: "text-yellow-400 bg-yellow-500/10",
|
||||
Error: "text-red-500 bg-red-600/10",
|
||||
default: "text-zinc-400 bg-zinc-500/10",
|
||||
};
|
||||
|
||||
const FLUX_EVENT_COLORS: Record<string, string> = {
|
||||
Update: "text-amber-300 bg-amber-500/20",
|
||||
EndOfTurn: "text-emerald-300 bg-emerald-500/20",
|
||||
EagerEndOfTurn: "text-cyan-300 bg-cyan-500/20",
|
||||
StartOfTurn: "text-blue-300 bg-blue-500/20",
|
||||
TurnResumed: "text-purple-300 bg-purple-500/20",
|
||||
default: "text-zinc-300 bg-zinc-500/20",
|
||||
};
|
||||
|
||||
export default function FluxEventItem({
|
||||
event,
|
||||
isExpanded,
|
||||
onToggleExpand,
|
||||
}: FluxEventItemProps) {
|
||||
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
|
||||
const data = event.data;
|
||||
|
||||
// Flux TurnInfo fields
|
||||
const fluxEvent = data.event as string | undefined;
|
||||
const transcript = data.transcript as string | undefined;
|
||||
const endOfTurnConfidence = data.end_of_turn_confidence as number | undefined;
|
||||
const turnIndex = data.turn_index as number | undefined;
|
||||
|
||||
const isFinal = fluxEvent === "EndOfTurn";
|
||||
const fluxEventColor = fluxEvent
|
||||
? FLUX_EVENT_COLORS[fluxEvent] || FLUX_EVENT_COLORS.default
|
||||
: "";
|
||||
|
||||
// For non-TurnInfo events
|
||||
const isConnection = event.event_type === "Connected";
|
||||
|
||||
return (
|
||||
<div className="flex-1 min-w-0 space-y-1">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
|
||||
{event.event_type}
|
||||
</span>
|
||||
|
||||
{/* Flux sub-event type */}
|
||||
{fluxEvent && (
|
||||
<span className={`text-xs px-2 py-0.5 rounded ${fluxEventColor}`}>
|
||||
{fluxEvent}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Final/Partial indicator */}
|
||||
{fluxEvent && (
|
||||
<span
|
||||
className={`text-xs px-2 py-0.5 rounded ${
|
||||
isFinal
|
||||
? "text-emerald-400 bg-emerald-500/10"
|
||||
: "text-amber-400 bg-amber-500/10"
|
||||
}`}
|
||||
>
|
||||
{isFinal ? "Final" : "Partial"}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Turn index */}
|
||||
{turnIndex !== undefined && (
|
||||
<span className="text-xs text-zinc-500">
|
||||
Turn {turnIndex}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* EOT confidence */}
|
||||
{endOfTurnConfidence !== undefined && (
|
||||
<span className="text-xs text-zinc-500 font-mono">
|
||||
EOT: {(endOfTurnConfidence * 100).toFixed(1)}%
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Transcript or status message */}
|
||||
<div className="text-sm text-zinc-300 truncate">
|
||||
{transcript || (isConnection ? "[Connected]" : `[${fluxEvent || event.event_type}]`)}
|
||||
</div>
|
||||
|
||||
{/* Expand/collapse button */}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onToggleExpand();
|
||||
}}
|
||||
className="text-xs text-zinc-500 hover:text-zinc-300"
|
||||
>
|
||||
{isExpanded ? "Hide details" : "Show details"}
|
||||
</button>
|
||||
|
||||
{/* Expanded JSON view */}
|
||||
{isExpanded && (
|
||||
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
|
||||
{JSON.stringify(event.data, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
101
evals/visualizer/src/components/events/SpeechmaticsEventItem.tsx
Normal file
101
evals/visualizer/src/components/events/SpeechmaticsEventItem.tsx
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"use client";
|
||||
|
||||
import { CapturedEvent } from "@/types";
|
||||
|
||||
interface SpeechmaticsEventItemProps {
|
||||
event: CapturedEvent;
|
||||
isExpanded: boolean;
|
||||
onToggleExpand: () => void;
|
||||
}
|
||||
|
||||
const EVENT_COLORS: Record<string, string> = {
|
||||
AddTranscript: "text-purple-400 bg-purple-500/10",
|
||||
RecognitionStarted: "text-yellow-400 bg-yellow-500/10",
|
||||
EndOfTranscript: "text-red-400 bg-red-500/10",
|
||||
Warning: "text-orange-400 bg-orange-500/10",
|
||||
Error: "text-red-500 bg-red-600/10",
|
||||
default: "text-zinc-400 bg-zinc-500/10",
|
||||
};
|
||||
|
||||
function getTranscript(event: CapturedEvent): string {
|
||||
const data = event.data;
|
||||
const results = data.results as Array<{
|
||||
type?: string;
|
||||
alternatives?: Array<{ content?: string }>;
|
||||
}> | undefined;
|
||||
|
||||
if (results) {
|
||||
const words = results
|
||||
.filter((r) => r.type === "word" && r.alternatives?.[0]?.content)
|
||||
.map((r) => r.alternatives![0].content)
|
||||
.join(" ");
|
||||
return words;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
export default function SpeechmaticsEventItem({
|
||||
event,
|
||||
isExpanded,
|
||||
onToggleExpand,
|
||||
}: SpeechmaticsEventItemProps) {
|
||||
const colorClass = EVENT_COLORS[event.event_type] || EVENT_COLORS.default;
|
||||
const data = event.data;
|
||||
|
||||
const transcript = getTranscript(event);
|
||||
|
||||
// Status events
|
||||
const isRecognitionStarted = event.event_type === "RecognitionStarted";
|
||||
const isEndOfTranscript = event.event_type === "EndOfTranscript";
|
||||
const isWarning = event.event_type === "Warning";
|
||||
|
||||
// Warning reason
|
||||
const warningReason = isWarning ? (data.reason as string | undefined) : undefined;
|
||||
|
||||
return (
|
||||
<div className="flex-1 min-w-0 space-y-1">
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className={`text-xs px-2 py-0.5 rounded ${colorClass}`}>
|
||||
{event.event_type}
|
||||
</span>
|
||||
|
||||
{/* AddTranscript is always final in Speechmatics */}
|
||||
{event.event_type === "AddTranscript" && (
|
||||
<span className="text-xs px-2 py-0.5 rounded text-emerald-400 bg-emerald-500/10">
|
||||
Final
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Transcript or status message */}
|
||||
<div className="text-sm text-zinc-300 truncate">
|
||||
{transcript ||
|
||||
(isRecognitionStarted
|
||||
? "[Recognition Started]"
|
||||
: isEndOfTranscript
|
||||
? "[End of Transcript]"
|
||||
: isWarning
|
||||
? `[Warning: ${warningReason || "unknown"}]`
|
||||
: `[${event.event_type}]`)}
|
||||
</div>
|
||||
|
||||
{/* Expand/collapse button */}
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onToggleExpand();
|
||||
}}
|
||||
className="text-xs text-zinc-500 hover:text-zinc-300"
|
||||
>
|
||||
{isExpanded ? "Hide details" : "Show details"}
|
||||
</button>
|
||||
|
||||
{/* Expanded JSON view */}
|
||||
{isExpanded && (
|
||||
<pre className="mt-2 p-2 bg-zinc-900 rounded text-xs text-zinc-400 overflow-x-auto max-h-64">
|
||||
{JSON.stringify(event.data, null, 2)}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
3
evals/visualizer/src/components/events/index.ts
Normal file
3
evals/visualizer/src/components/events/index.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
export { default as DeepgramEventItem } from "./DeepgramEventItem";
|
||||
export { default as FluxEventItem } from "./FluxEventItem";
|
||||
export { default as SpeechmaticsEventItem } from "./SpeechmaticsEventItem";
|
||||
24
evals/visualizer/src/types/index.ts
Normal file
24
evals/visualizer/src/types/index.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
export interface CapturedEvent {
|
||||
timestamp: number;
|
||||
event_type: string;
|
||||
data: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface EventCaptureResult {
|
||||
audio_file: string;
|
||||
audio_path: string;
|
||||
provider: string;
|
||||
duration: number;
|
||||
created_at: string;
|
||||
events: CapturedEvent[];
|
||||
transcript: string;
|
||||
}
|
||||
|
||||
export interface ResultSummary {
|
||||
id: string;
|
||||
audio_file: string;
|
||||
provider: string;
|
||||
duration: number;
|
||||
created_at: string;
|
||||
event_count: number;
|
||||
}
|
||||
34
evals/visualizer/tsconfig.json
Normal file
34
evals/visualizer/tsconfig.json
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "react-jsx",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": [
|
||||
"next-env.d.ts",
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
".next/types/**/*.ts",
|
||||
".next/dev/types/**/*.ts",
|
||||
"**/*.mts"
|
||||
],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit a1d3062446240b6b27ebc787d28578e4561e7441
|
||||
Subproject commit f11fad8f3e90e06b1625b9dc49c13e26f3c9e716
|
||||
|
|
@ -16,6 +16,11 @@
|
|||
"type": "json",
|
||||
"path": "ui/package.json",
|
||||
"jsonpath": "$.version"
|
||||
},
|
||||
{
|
||||
"type": "toml",
|
||||
"path": "api/pyproject.toml",
|
||||
"key": "project.version"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { redirect } from "next/navigation";
|
||||
|
||||
import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
|
||||
import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
|
||||
import { getServerAccessToken,getServerAuthProvider, getServerUser } from "@/lib/auth/server";
|
||||
import logger from '@/lib/logger';
|
||||
import { getRedirectUrl } from "@/lib/utils";
|
||||
|
|
@ -34,21 +34,18 @@ export default async function AfterSignInPage() {
|
|||
try {
|
||||
const accessToken = await getServerAccessToken();
|
||||
if (accessToken) {
|
||||
const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
|
||||
const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
|
||||
const activeWorkflows = workflows.filter(w => w.status === 'active');
|
||||
|
||||
logger.debug('[AfterSignInPage] Found workflows:', {
|
||||
total: workflows.length,
|
||||
active: activeWorkflows.length
|
||||
total: countResponse.data?.total,
|
||||
active: countResponse.data?.active
|
||||
});
|
||||
|
||||
if (activeWorkflows.length > 0) {
|
||||
if (countResponse.data && countResponse.data.active > 0) {
|
||||
logger.debug('[AfterSignInPage] Redirecting to /workflow - user has workflows');
|
||||
redirect('/workflow');
|
||||
} else {
|
||||
|
|
|
|||
33
ui/src/app/api/config/version/route.ts
Normal file
33
ui/src/app/api/config/version/route.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { NextResponse } from "next/server";
|
||||
|
||||
import { healthApiV1HealthGet } from "@/client/sdk.gen";
|
||||
import type { HealthResponse } from "@/client/types.gen";
|
||||
|
||||
// Import version from package.json at build time
|
||||
import packageJson from "../../../../../package.json";
|
||||
|
||||
export async function GET() {
|
||||
const uiVersion = packageJson.version || "dev";
|
||||
|
||||
// Fetch backend version and config from health endpoint
|
||||
let apiVersion = "unknown";
|
||||
let backendApiEndpoint: string | null = null;
|
||||
|
||||
try {
|
||||
const response = await healthApiV1HealthGet();
|
||||
if (response.data) {
|
||||
const data = response.data as HealthResponse;
|
||||
apiVersion = data.version;
|
||||
backendApiEndpoint = data.backend_api_endpoint;
|
||||
}
|
||||
} catch {
|
||||
// Backend might not be reachable during build or in some deployments
|
||||
apiVersion = "unavailable";
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
ui: uiVersion,
|
||||
api: apiVersion,
|
||||
backendApiEndpoint,
|
||||
});
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ import AppLayout from "@/components/layout/AppLayout";
|
|||
import PostHogIdentify from "@/components/PostHogIdentify";
|
||||
import SpinLoader from "@/components/SpinLoader";
|
||||
import { Toaster } from "@/components/ui/sonner";
|
||||
import { AppConfigProvider } from "@/context/AppConfigContext";
|
||||
import { OnboardingProvider } from "@/context/OnboardingContext";
|
||||
import { UserConfigProvider } from "@/context/UserConfigContext";
|
||||
import { AuthProvider } from "@/lib/auth";
|
||||
|
|
@ -59,18 +60,20 @@ export default function RootLayout({
|
|||
<body
|
||||
className={`${geistSans.variable} ${geistMono.variable} antialiased`}>
|
||||
<AuthProvider>
|
||||
<Suspense fallback={<SpinLoader />}>
|
||||
<UserConfigProvider>
|
||||
<OnboardingProvider>
|
||||
<PostHogIdentify />
|
||||
<AppLayout>
|
||||
{children}
|
||||
</AppLayout>
|
||||
<Toaster />
|
||||
<ChatwootWidget />
|
||||
</OnboardingProvider>
|
||||
</UserConfigProvider>
|
||||
</Suspense>
|
||||
<AppConfigProvider>
|
||||
<Suspense fallback={<SpinLoader />}>
|
||||
<UserConfigProvider>
|
||||
<OnboardingProvider>
|
||||
<PostHogIdentify />
|
||||
<AppLayout>
|
||||
{children}
|
||||
</AppLayout>
|
||||
<Toaster />
|
||||
<ChatwootWidget />
|
||||
</OnboardingProvider>
|
||||
</UserConfigProvider>
|
||||
</Suspense>
|
||||
</AppConfigProvider>
|
||||
</AuthProvider>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { isNextRouterError } from "next/dist/client/components/is-next-router-error";
|
||||
import { redirect } from "next/navigation";
|
||||
|
||||
import { getWorkflowsApiV1WorkflowFetchGet } from "@/client/sdk.gen";
|
||||
import { getWorkflowCountApiV1WorkflowCountGet } from "@/client/sdk.gen";
|
||||
import SignInClient from "@/components/SignInClient";
|
||||
import { getServerAccessToken,getServerAuthProvider,getServerUser } from "@/lib/auth/server";
|
||||
import logger from '@/lib/logger';
|
||||
|
|
@ -21,21 +21,18 @@ export default async function Home() {
|
|||
try {
|
||||
const accessToken = await getServerAccessToken();
|
||||
if (accessToken) {
|
||||
const workflowsResponse = await getWorkflowsApiV1WorkflowFetchGet({
|
||||
const countResponse = await getWorkflowCountApiV1WorkflowCountGet({
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
const workflows = workflowsResponse.data ? (Array.isArray(workflowsResponse.data) ? workflowsResponse.data : [workflowsResponse.data]) : [];
|
||||
const activeWorkflows = workflows.filter(w => w.status === 'active');
|
||||
|
||||
logger.debug('[HomePage] Found workflows for local provider:', {
|
||||
total: workflows.length,
|
||||
active: activeWorkflows.length
|
||||
total: countResponse.data?.total,
|
||||
active: countResponse.data?.active
|
||||
});
|
||||
|
||||
if (activeWorkflows.length > 0) {
|
||||
if (countResponse.data && countResponse.data.active > 0) {
|
||||
logger.debug('[HomePage] Redirecting to /workflow - user has workflows');
|
||||
redirect('/workflow');
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -326,14 +326,64 @@ export default function UsagePage() {
|
|||
isDisabled={savingTimezone || userConfigLoading}
|
||||
placeholder={userConfigLoading ? "Loading..." : "Select timezone"}
|
||||
styles={{
|
||||
control: (base) => ({
|
||||
control: (base, state) => ({
|
||||
...base,
|
||||
minHeight: '36px',
|
||||
fontSize: '14px',
|
||||
backgroundColor: 'var(--background)',
|
||||
borderColor: state.isFocused ? 'var(--ring)' : 'var(--border)',
|
||||
boxShadow: state.isFocused ? '0 0 0 2px color-mix(in srgb, var(--ring) 20%, transparent)' : 'none',
|
||||
'&:hover': {
|
||||
borderColor: 'var(--border)',
|
||||
},
|
||||
}),
|
||||
menu: (base) => ({
|
||||
...base,
|
||||
zIndex: 9999,
|
||||
backgroundColor: 'var(--popover)',
|
||||
border: '1px solid var(--border)',
|
||||
boxShadow: '0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
|
||||
}),
|
||||
menuList: (base) => ({
|
||||
...base,
|
||||
backgroundColor: 'var(--popover)',
|
||||
padding: 0,
|
||||
}),
|
||||
option: (base, state) => ({
|
||||
...base,
|
||||
backgroundColor: state.isSelected
|
||||
? 'var(--accent)'
|
||||
: state.isFocused
|
||||
? 'var(--accent)'
|
||||
: 'var(--popover)',
|
||||
color: 'var(--foreground)',
|
||||
cursor: 'pointer',
|
||||
'&:active': {
|
||||
backgroundColor: 'var(--accent)',
|
||||
},
|
||||
}),
|
||||
singleValue: (base) => ({
|
||||
...base,
|
||||
color: 'var(--foreground)',
|
||||
}),
|
||||
input: (base) => ({
|
||||
...base,
|
||||
color: 'var(--foreground)',
|
||||
}),
|
||||
placeholder: (base) => ({
|
||||
...base,
|
||||
color: 'var(--muted-foreground)',
|
||||
}),
|
||||
indicatorSeparator: (base) => ({
|
||||
...base,
|
||||
backgroundColor: 'var(--border)',
|
||||
}),
|
||||
dropdownIndicator: (base) => ({
|
||||
...base,
|
||||
color: 'var(--muted-foreground)',
|
||||
'&:hover': {
|
||||
color: 'var(--foreground)',
|
||||
},
|
||||
}),
|
||||
}}
|
||||
/>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -524,6 +524,12 @@ export type HttpValidationError = {
|
|||
detail?: Array<ValidationError>;
|
||||
};
|
||||
|
||||
export type HealthResponse = {
|
||||
status: string;
|
||||
version: string;
|
||||
backend_api_endpoint: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Configuration for HTTP API tools.
|
||||
*/
|
||||
|
|
@ -1042,6 +1048,15 @@ export type VonageConfigurationResponse = {
|
|||
*/
|
||||
export type WebhookCredentialType = 'none' | 'api_key' | 'bearer_token' | 'basic_auth' | 'custom_header';
|
||||
|
||||
/**
|
||||
* Response for workflow count endpoint.
|
||||
*/
|
||||
export type WorkflowCountResponse = {
|
||||
total: number;
|
||||
active: number;
|
||||
archived: number;
|
||||
};
|
||||
|
||||
export type WorkflowError = {
|
||||
kind: ItemKind;
|
||||
id: string | null;
|
||||
|
|
@ -1049,6 +1064,17 @@ export type WorkflowError = {
|
|||
message: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Lightweight response for workflow listings (excludes large fields).
|
||||
*/
|
||||
export type WorkflowListResponse = {
|
||||
id: number;
|
||||
name: string;
|
||||
status: string;
|
||||
created_at: string;
|
||||
total_runs: number;
|
||||
};
|
||||
|
||||
export type WorkflowOption = {
|
||||
id: number;
|
||||
name: string;
|
||||
|
|
@ -1391,6 +1417,7 @@ export type HandleInboundTelephonyApiV1TelephonyInboundWorkflowIdPostData = {
|
|||
'x-twilio-signature'?: string | null;
|
||||
'x-vobiz-signature'?: string | null;
|
||||
'x-vobiz-timestamp'?: string | null;
|
||||
'x-cx-apikey'?: string | null;
|
||||
};
|
||||
path: {
|
||||
workflow_id: number;
|
||||
|
|
@ -1655,6 +1682,39 @@ export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses =
|
|||
|
||||
export type CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponse = CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses[keyof CreateWorkflowFromTemplateApiV1WorkflowCreateTemplatePostResponses];
|
||||
|
||||
export type GetWorkflowCountApiV1WorkflowCountGetData = {
|
||||
body?: never;
|
||||
headers?: {
|
||||
authorization?: string | null;
|
||||
'X-API-Key'?: string | null;
|
||||
};
|
||||
path?: never;
|
||||
query?: never;
|
||||
url: '/api/v1/workflow/count';
|
||||
};
|
||||
|
||||
export type GetWorkflowCountApiV1WorkflowCountGetErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type GetWorkflowCountApiV1WorkflowCountGetError = GetWorkflowCountApiV1WorkflowCountGetErrors[keyof GetWorkflowCountApiV1WorkflowCountGetErrors];
|
||||
|
||||
export type GetWorkflowCountApiV1WorkflowCountGetResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: WorkflowCountResponse;
|
||||
};
|
||||
|
||||
export type GetWorkflowCountApiV1WorkflowCountGetResponse = GetWorkflowCountApiV1WorkflowCountGetResponses[keyof GetWorkflowCountApiV1WorkflowCountGetResponses];
|
||||
|
||||
export type GetWorkflowsApiV1WorkflowFetchGetData = {
|
||||
body?: never;
|
||||
headers?: {
|
||||
|
|
@ -1688,7 +1748,7 @@ export type GetWorkflowsApiV1WorkflowFetchGetResponses = {
|
|||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: Array<WorkflowResponse>;
|
||||
200: Array<WorkflowListResponse>;
|
||||
};
|
||||
|
||||
export type GetWorkflowsApiV1WorkflowFetchGetResponse = GetWorkflowsApiV1WorkflowFetchGetResponses[keyof GetWorkflowsApiV1WorkflowFetchGetResponses];
|
||||
|
|
@ -4168,6 +4228,41 @@ export type InitiateCallApiV1PublicAgentUuidPostResponses = {
|
|||
|
||||
export type InitiateCallApiV1PublicAgentUuidPostResponse = InitiateCallApiV1PublicAgentUuidPostResponses[keyof InitiateCallApiV1PublicAgentUuidPostResponses];
|
||||
|
||||
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetData = {
|
||||
body?: never;
|
||||
path: {
|
||||
token: string;
|
||||
artifact_type: 'recording' | 'transcript';
|
||||
};
|
||||
query?: {
|
||||
/**
|
||||
* Display inline in browser instead of download
|
||||
*/
|
||||
inline?: boolean;
|
||||
};
|
||||
url: '/api/v1/public/download/workflow/{token}/{artifact_type}';
|
||||
};
|
||||
|
||||
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors = {
|
||||
/**
|
||||
* Not found
|
||||
*/
|
||||
404: unknown;
|
||||
/**
|
||||
* Validation Error
|
||||
*/
|
||||
422: HttpValidationError;
|
||||
};
|
||||
|
||||
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetError = DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors[keyof DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetErrors];
|
||||
|
||||
export type DownloadWorkflowArtifactApiV1PublicDownloadWorkflowTokenArtifactTypeGetResponses = {
|
||||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
};
|
||||
|
||||
export type DeactivateEmbedTokenApiV1WorkflowWorkflowIdEmbedTokenDeleteData = {
|
||||
body?: never;
|
||||
headers?: {
|
||||
|
|
@ -4500,9 +4595,11 @@ export type HealthApiV1HealthGetResponses = {
|
|||
/**
|
||||
* Successful Response
|
||||
*/
|
||||
200: unknown;
|
||||
200: HealthResponse;
|
||||
};
|
||||
|
||||
export type HealthApiV1HealthGetResponse = HealthApiV1HealthGetResponses[keyof HealthApiV1HealthGetResponses];
|
||||
|
||||
export type ClientOptions = {
|
||||
baseUrl: 'http://127.0.0.1:8000' | (string & {});
|
||||
};
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
|
|||
const [isOpen, setIsOpen] = useState(false);
|
||||
const [mediaType, setMediaType] = useState<'audio' | 'transcript' | null>(null);
|
||||
const [mediaSignedUrl, setMediaSignedUrl] = useState<string | null>(null);
|
||||
const [transcriptContent, setTranscriptContent] = useState<string | null>(null);
|
||||
const [selectedRunId, setSelectedRunId] = useState<number | null>(null);
|
||||
const [mediaDownloadKey, setMediaDownloadKey] = useState<string | null>(null);
|
||||
const [mediaLoading, setMediaLoading] = useState(false);
|
||||
|
|
@ -47,6 +48,7 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
|
|||
async (fileKey: string | null, runId: number) => {
|
||||
if (!fileKey || !accessToken) return;
|
||||
setMediaLoading(true);
|
||||
setTranscriptContent(null);
|
||||
const signed = await getSignedUrl(fileKey, accessToken, true);
|
||||
if (signed) {
|
||||
setMediaType('transcript');
|
||||
|
|
@ -54,6 +56,14 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
|
|||
setMediaDownloadKey(fileKey);
|
||||
setSelectedRunId(runId);
|
||||
setIsOpen(true);
|
||||
// Fetch transcript content with proper UTF-8 encoding
|
||||
try {
|
||||
const response = await fetch(signed);
|
||||
const text = await response.text();
|
||||
setTranscriptContent(text);
|
||||
} catch (error) {
|
||||
console.error('Error fetching transcript:', error);
|
||||
}
|
||||
}
|
||||
setMediaLoading(false);
|
||||
},
|
||||
|
|
@ -84,12 +94,10 @@ export function MediaPreviewDialog({ accessToken }: MediaPreviewDialogProps) {
|
|||
<audio src={mediaSignedUrl} controls autoPlay className="w-full mt-4" />
|
||||
)}
|
||||
|
||||
{!mediaLoading && mediaType === 'transcript' && mediaSignedUrl && (
|
||||
<iframe
|
||||
src={mediaSignedUrl}
|
||||
title="Transcript"
|
||||
className="w-full h-[60vh] border rounded-md mt-4"
|
||||
/>
|
||||
{!mediaLoading && mediaType === 'transcript' && transcriptContent && (
|
||||
<pre className="w-full h-[60vh] overflow-auto border rounded-md mt-4 p-4 bg-muted text-sm whitespace-pre-wrap font-mono">
|
||||
{transcriptContent}
|
||||
</pre>
|
||||
)}
|
||||
|
||||
<DialogFooter className="pt-4">
|
||||
|
|
|
|||
|
|
@ -321,9 +321,20 @@ export default function ServiceConfiguration() {
|
|||
if (!providerSchema) return [];
|
||||
|
||||
// Find all config fields (not provider, not api_key)
|
||||
return Object.keys(providerSchema.properties).filter(
|
||||
const fields = Object.keys(providerSchema.properties).filter(
|
||||
field => field !== "provider" && field !== "api_key"
|
||||
);
|
||||
|
||||
// For Deepgram STT, hide language field when flux-general-en model is selected
|
||||
// Flux model is English-only and doesn't support language selection
|
||||
if (service === "stt" && currentProvider === "deepgram") {
|
||||
const currentModel = watch("stt_model") as string;
|
||||
if (currentModel === "flux-general-en") {
|
||||
return fields.filter(field => field !== "language");
|
||||
}
|
||||
}
|
||||
|
||||
return fields;
|
||||
};
|
||||
|
||||
const renderServiceFields = (service: ServiceSegment) => {
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ interface EndCallNodeProps extends NodeProps {
|
|||
}
|
||||
|
||||
export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
|
||||
const { open, setOpen, handleSaveNodeData } = useNodeHandlers({
|
||||
const { open, setOpen, handleSaveNodeData, handleDeleteNode } = useNodeHandlers({
|
||||
id,
|
||||
additionalData: { is_end: true }
|
||||
});
|
||||
|
|
@ -122,9 +122,14 @@ export const EndCall = memo(({ data, selected, id }: EndCallNodeProps) => {
|
|||
</NodeContent>
|
||||
|
||||
<NodeToolbar isVisible={selected} position={Position.Right}>
|
||||
<Button onClick={() => setOpen(true)} variant="outline" size="icon">
|
||||
<Edit />
|
||||
</Button>
|
||||
<div className="flex flex-col gap-1">
|
||||
<Button onClick={() => setOpen(true)} variant="outline" size="icon">
|
||||
<Edit />
|
||||
</Button>
|
||||
<Button onClick={handleDeleteNode} variant="outline" size="icon">
|
||||
<Trash2Icon />
|
||||
</Button>
|
||||
</div>
|
||||
</NodeToolbar>
|
||||
|
||||
<NodeEditDialog
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue