dograh/api/services/configuration/options/azure.py
Vishal Dhateria 7ba95c0fbe
feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime) (#381)
* feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)

Enables Azure AI services across all model layers so users with Azure
credits can consolidate billing on a single provider.

- Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider
- Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider
- Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider
- Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider

New files:
- api/services/pipecat/realtime/azure_realtime.py
- api/services/gen_ai/embedding/azure_openai_service.py
- api/tests/test_azure_speech_service_factory.py

The UI picks up all four providers automatically from the schema —
no frontend changes required.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: add validation for URL and params

---------

Co-authored-by: Vishal Dhateria <vishal@finela.ai>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
2026-06-02 12:50:00 +05:30

125 lines
2 KiB
Python

AZURE_MODELS = ["gpt-4.1-mini"]
AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
AZURE_REALTIME_VOICES = [
"alloy",
"ash",
"ballad",
"coral",
"echo",
"sage",
"shimmer",
"verse",
]
AZURE_REALTIME_API_VERSIONS = [
"2025-04-01-preview",
"2024-10-01-preview",
"2024-12-17",
]
AZURE_SPEECH_REGIONS = [
"eastus",
"eastus2",
"westus",
"westus2",
"westus3",
"centralus",
"northcentralus",
"southcentralus",
"westcentralus",
"westeurope",
"northeurope",
"uksouth",
"ukwest",
"francecentral",
"switzerlandnorth",
"germanywestcentral",
"norwayeast",
"australiaeast",
"eastasia",
"southeastasia",
"japaneast",
"japanwest",
"koreacentral",
"centralindia",
"southindia",
"brazilsouth",
]
AZURE_SPEECH_TTS_LANGUAGES = [
"en-US",
"en-GB",
"en-AU",
"en-CA",
"en-IN",
"es-ES",
"es-MX",
"fr-FR",
"fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN",
"zh-HK",
"zh-TW",
"pt-BR",
"pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"sv-SE",
"hi-IN",
]
AZURE_SPEECH_TTS_VOICES = [
"en-US-AriaNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-US-DavisNeural",
"en-US-AmberNeural",
"en-US-AnaNeural",
"en-US-AshleyNeural",
"en-US-BrandonNeural",
"en-US-ChristopherNeural",
"en-US-ElizabethNeural",
"en-US-EricNeural",
"en-US-JacobNeural",
"en-US-MichelleNeural",
"en-US-MonicaNeural",
"en-US-NancyNeural",
"en-US-RogerNeural",
"en-US-SaraNeural",
"en-US-SteffanNeural",
"en-US-TonyNeural",
]
AZURE_SPEECH_STT_LANGUAGES = [
"en-US",
"en-GB",
"en-AU",
"en-CA",
"en-IN",
"es-ES",
"es-MX",
"fr-FR",
"fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN",
"pt-BR",
"pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"hi-IN",
]
AZURE_EMBEDDING_MODELS = [
"text-embedding-3-small",
"text-embedding-ada-002",
]