dograh/api/services/voice_prompting_guide/topics/speech_handling.py

"""Topic: handle noisy audio, bad transcripts, and silence gracefully."""

from __future__ import annotations

from api.services.voice_prompting_guide._base import (
    AuditCheck,
    Stage,
    StageLens,
    VoicePromptingTopic,
)

TOPIC = VoicePromptingTopic(
    id="speech_handling",
    title="Handle noisy audio and bad transcripts without guessing",
    severity="medium",
    applies_to_node_types=("globalNode",),
    stages={
        Stage.create: StageLens(
            relevant=True,
            lens=(
                "Tell the global prompt that audio is noisy and transcripts may be "
                "wrong. When a response doesn't make coherent sense, the agent "
                "should ask the caller to repeat rather than guess."
            ),
        ),
        Stage.review: StageLens(
            relevant=True,
            lens=(
                "Confirm the prompt acknowledges noisy transcripts and gives a "
                "recovery move ('Sorry, can you repeat that?'). Agents that guess at "
                "garbled input compound the error."
            ),
        ),
    },
    content="""\
Voice transcripts are noisy. Transcripts arrive partially wrong, callers talk
over the agent, lines drop, and accents confuse the STT — and you can't ask the
caller to "scroll up". The prompt has to handle this without breaking flow.

Put in the global prompt:
- Tell the model the audio can be noisy and the transcript may contain errors.
- When the user's response doesn't make coherent sense — likely a transcript
  error — the agent should say something like "Sorry, can you repeat that?" or
  "The line's a bit patchy, I didn't catch you" rather than guessing at what
  was said.

This is the input-side complement to reading back critical information: speech
handling covers what to do when you didn't catch something; readback covers
confirming the things you did catch but can't afford to get wrong.

Examples:
- Good: "Audio may be noisy and transcripts imperfect. If a reply doesn't make
  sense, ask the caller to repeat instead of assuming."
- Bad:  Agent receives a garbled order ID and proceeds to a tool call with its
  best guess, producing a wrong-order lookup.
""",
    audit_checks=(
        AuditCheck(
            id="handles_unclear_input",
            judge_question=(
                "Does the prompt tell the agent what to do when the caller's input "
                "is unclear or incoherent — ask them to repeat — rather than "
                "guessing at the meaning?"
            ),
            expected="yes",
            quote=(
                "No recovery for unclear input — tell the agent to ask the caller to "
                "repeat instead of guessing at a bad transcript."
            ),
        ),
    ),
    cross_refs=("readback_and_extraction", "language_and_format"),
)
feat: add mcp guides for various topic and stages for bot building (#380) 2026-05-31 16:07:32 +05:30			`"""Topic: handle noisy audio, bad transcripts, and silence gracefully."""`

			`from __future__ import annotations`

			`from api.services.voice_prompting_guide._base import (`
			`AuditCheck,`
			`Stage,`
			`StageLens,`
			`VoicePromptingTopic,`
			`)`

			`TOPIC = VoicePromptingTopic(`
			`id="speech_handling",`
			`title="Handle noisy audio and bad transcripts without guessing",`
			`severity="medium",`
			`applies_to_node_types=("globalNode",),`
			`stages={`
			`Stage.create: StageLens(`
			`relevant=True,`
			`lens=(`
			`"Tell the global prompt that audio is noisy and transcripts may be "`
			`"wrong. When a response doesn't make coherent sense, the agent "`
			`"should ask the caller to repeat rather than guess."`
			`),`
			`),`
			`Stage.review: StageLens(`
			`relevant=True,`
			`lens=(`
			`"Confirm the prompt acknowledges noisy transcripts and gives a "`
			`"recovery move ('Sorry, can you repeat that?'). Agents that guess at "`
			`"garbled input compound the error."`
			`),`
			`),`
			`},`
			`content="""\`
			`Voice transcripts are noisy. Transcripts arrive partially wrong, callers talk`
			`over the agent, lines drop, and accents confuse the STT — and you can't ask the`
			`caller to "scroll up". The prompt has to handle this without breaking flow.`

			`Put in the global prompt:`
			`- Tell the model the audio can be noisy and the transcript may contain errors.`
			`- When the user's response doesn't make coherent sense — likely a transcript`
			`error — the agent should say something like "Sorry, can you repeat that?" or`
			`"The line's a bit patchy, I didn't catch you" rather than guessing at what`
			`was said.`

			`This is the input-side complement to reading back critical information: speech`
			`handling covers what to do when you didn't catch something; readback covers`
			`confirming the things you did catch but can't afford to get wrong.`

			`Examples:`
			`- Good: "Audio may be noisy and transcripts imperfect. If a reply doesn't make`
			`sense, ask the caller to repeat instead of assuming."`
			`- Bad: Agent receives a garbled order ID and proceeds to a tool call with its`
			`best guess, producing a wrong-order lookup.`
			`""",`
			`audit_checks=(`
			`AuditCheck(`
			`id="handles_unclear_input",`
			`judge_question=(`
			`"Does the prompt tell the agent what to do when the caller's input "`
			`"is unclear or incoherent — ask them to repeat — rather than "`
			`"guessing at the meaning?"`
			`),`
			`expected="yes",`
			`quote=(`
			`"No recovery for unclear input — tell the agent to ask the caller to "`
			`"repeat instead of guessing at a bad transcript."`
			`),`
			`),`
			`),`
			`cross_refs=("readback_and_extraction", "language_and_format"),`
			`)`