fix: add validation for URL and params

This commit is contained in:
Abhishek Kumar 2026-06-02 12:45:13 +05:30
parent 858c474139
commit 7a3b1c4a4b
11 changed files with 327 additions and 129 deletions

View file

@ -370,7 +370,10 @@ async def search_chunks(
try:
# Import here to avoid circular dependency
from api.services.configuration.registry import ServiceProviders
from api.services.gen_ai import AzureOpenAIEmbeddingService, OpenAIEmbeddingService
from api.services.gen_ai import (
AzureOpenAIEmbeddingService,
OpenAIEmbeddingService,
)
# Try to get user's embeddings configuration
user_config = await db_client.get_user_configurations(user.id)
@ -385,7 +388,9 @@ async def search_chunks(
embeddings_model = user_config.embeddings.model
embeddings_provider = getattr(user_config.embeddings, "provider", None)
embeddings_endpoint = getattr(user_config.embeddings, "endpoint", None)
embeddings_api_version = getattr(user_config.embeddings, "api_version", None)
embeddings_api_version = getattr(
user_config.embeddings, "api_version", None
)
# Initialize embedding service based on provider
if embeddings_provider == ServiceProviders.AZURE.value and embeddings_endpoint:

View file

@ -41,6 +41,7 @@ class UserConfigurationValidator:
ServiceProviders.ELEVENLABS.value: self._validate_elevenlabs_api_key,
ServiceProviders.GOOGLE.value: self._check_google_api_key,
ServiceProviders.AZURE.value: self._check_azure_api_key,
ServiceProviders.AZURE_SPEECH.value: self._check_azure_speech_api_key,
ServiceProviders.CARTESIA.value: self._check_cartesia_api_key,
ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
@ -54,6 +55,7 @@ class UserConfigurationValidator:
ServiceProviders.ULTRAVOX_REALTIME.value: self._check_ultravox_realtime_api_key,
ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
ServiceProviders.GOOGLE_VERTEX_REALTIME.value: self._check_google_vertex_realtime_api_key,
ServiceProviders.AZURE_REALTIME.value: self._check_azure_realtime_api_key,
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
ServiceProviders.RIME.value: self._check_rime_api_key,
@ -313,6 +315,12 @@ class UserConfigurationValidator:
def _check_azure_api_key(self, model: str, api_key: str) -> bool:
return True
def _check_azure_speech_api_key(self, model: str, api_key: str) -> bool:
return True
def _check_azure_realtime_api_key(self, model: str, api_key: str) -> bool:
return True
def _check_cartesia_api_key(self, model: str, api_key: str) -> bool:
return True

View file

@ -1,3 +1,14 @@
from .azure import (
AZURE_EMBEDDING_MODELS,
AZURE_MODELS,
AZURE_REALTIME_API_VERSIONS,
AZURE_REALTIME_MODELS,
AZURE_REALTIME_VOICES,
AZURE_SPEECH_REGIONS,
AZURE_SPEECH_STT_LANGUAGES,
AZURE_SPEECH_TTS_LANGUAGES,
AZURE_SPEECH_TTS_VOICES,
)
from .deepgram import DEEPGRAM_LANGUAGES, DEEPGRAM_STT_MODELS
from .gladia import GLADIA_STT_LANGUAGES, GLADIA_STT_MODELS
from .google import (
@ -27,6 +38,15 @@ from .sarvam import (
from .speechmatics import SPEECHMATICS_STT_LANGUAGES
__all__ = [
"AZURE_EMBEDDING_MODELS",
"AZURE_MODELS",
"AZURE_REALTIME_API_VERSIONS",
"AZURE_REALTIME_MODELS",
"AZURE_REALTIME_VOICES",
"AZURE_SPEECH_REGIONS",
"AZURE_SPEECH_STT_LANGUAGES",
"AZURE_SPEECH_TTS_LANGUAGES",
"AZURE_SPEECH_TTS_VOICES",
"DEEPGRAM_LANGUAGES",
"DEEPGRAM_STT_MODELS",
"GLADIA_STT_LANGUAGES",

View file

@ -0,0 +1,125 @@
AZURE_MODELS = ["gpt-4.1-mini"]
AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
AZURE_REALTIME_VOICES = [
"alloy",
"ash",
"ballad",
"coral",
"echo",
"sage",
"shimmer",
"verse",
]
AZURE_REALTIME_API_VERSIONS = [
"2025-04-01-preview",
"2024-10-01-preview",
"2024-12-17",
]
AZURE_SPEECH_REGIONS = [
"eastus",
"eastus2",
"westus",
"westus2",
"westus3",
"centralus",
"northcentralus",
"southcentralus",
"westcentralus",
"westeurope",
"northeurope",
"uksouth",
"ukwest",
"francecentral",
"switzerlandnorth",
"germanywestcentral",
"norwayeast",
"australiaeast",
"eastasia",
"southeastasia",
"japaneast",
"japanwest",
"koreacentral",
"centralindia",
"southindia",
"brazilsouth",
]
AZURE_SPEECH_TTS_LANGUAGES = [
"en-US",
"en-GB",
"en-AU",
"en-CA",
"en-IN",
"es-ES",
"es-MX",
"fr-FR",
"fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN",
"zh-HK",
"zh-TW",
"pt-BR",
"pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"sv-SE",
"hi-IN",
]
AZURE_SPEECH_TTS_VOICES = [
"en-US-AriaNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-US-DavisNeural",
"en-US-AmberNeural",
"en-US-AnaNeural",
"en-US-AshleyNeural",
"en-US-BrandonNeural",
"en-US-ChristopherNeural",
"en-US-ElizabethNeural",
"en-US-EricNeural",
"en-US-JacobNeural",
"en-US-MichelleNeural",
"en-US-MonicaNeural",
"en-US-NancyNeural",
"en-US-RogerNeural",
"en-US-SaraNeural",
"en-US-SteffanNeural",
"en-US-TonyNeural",
]
AZURE_SPEECH_STT_LANGUAGES = [
"en-US",
"en-GB",
"en-AU",
"en-CA",
"en-IN",
"es-ES",
"es-MX",
"fr-FR",
"fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN",
"pt-BR",
"pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"hi-IN",
]
AZURE_EMBEDDING_MODELS = [
"text-embedding-3-small",
"text-embedding-ada-002",
]

View file

@ -5,6 +5,15 @@ from typing import Annotated, Dict, Literal, Type, TypeVar, Union
from pydantic import BaseModel, ConfigDict, Field, computed_field, field_validator
from api.services.configuration.options import (
AZURE_EMBEDDING_MODELS,
AZURE_MODELS,
AZURE_REALTIME_API_VERSIONS,
AZURE_REALTIME_MODELS,
AZURE_REALTIME_VOICES,
AZURE_SPEECH_REGIONS,
AZURE_SPEECH_STT_LANGUAGES,
AZURE_SPEECH_TTS_LANGUAGES,
AZURE_SPEECH_TTS_VOICES,
DEEPGRAM_LANGUAGES,
DEEPGRAM_STT_MODELS,
GLADIA_STT_LANGUAGES,
@ -286,7 +295,6 @@ OPENROUTER_MODELS = [
"meta-llama/llama-3.3-70b-instruct",
"deepseek/deepseek-chat-v3-0324",
]
AZURE_MODELS = ["gpt-4.1-mini"]
DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"]
AWS_BEDROCK_MODELS = [
"us.amazon.nova-pro-v1:0",
@ -680,24 +688,6 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
)
AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
AZURE_REALTIME_VOICES = [
"alloy",
"ash",
"ballad",
"coral",
"echo",
"sage",
"shimmer",
"verse",
]
AZURE_REALTIME_API_VERSIONS = [
"2025-04-01-preview",
"2024-10-01-preview",
"2024-12-17",
]
@register_service(ServiceType.REALTIME)
class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
model_config = AZURE_REALTIME_PROVIDER_MODEL_CONFIG
@ -1090,76 +1080,6 @@ class MiniMaxTTSConfiguration(BaseTTSConfiguration):
)
AZURE_SPEECH_REGIONS = [
"eastus",
"eastus2",
"westus",
"westus2",
"westus3",
"centralus",
"northcentralus",
"southcentralus",
"westcentralus",
"westeurope",
"northeurope",
"uksouth",
"ukwest",
"francecentral",
"switzerlandnorth",
"germanywestcentral",
"norwayeast",
"australiaeast",
"eastasia",
"southeastasia",
"japaneast",
"japanwest",
"koreacentral",
"centralindia",
"southindia",
"brazilsouth",
]
AZURE_SPEECH_TTS_LANGUAGES = [
"en-US", "en-GB", "en-AU", "en-CA", "en-IN",
"es-ES", "es-MX",
"fr-FR", "fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN", "zh-HK", "zh-TW",
"pt-BR", "pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"sv-SE",
"hi-IN",
]
AZURE_SPEECH_TTS_VOICES = [
"en-US-AriaNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-US-DavisNeural",
"en-US-AmberNeural",
"en-US-AnaNeural",
"en-US-AshleyNeural",
"en-US-BrandonNeural",
"en-US-ChristopherNeural",
"en-US-ElizabethNeural",
"en-US-EricNeural",
"en-US-JacobNeural",
"en-US-MichelleNeural",
"en-US-MonicaNeural",
"en-US-NancyNeural",
"en-US-RogerNeural",
"en-US-SaraNeural",
"en-US-SteffanNeural",
"en-US-TonyNeural",
]
@register_tts
class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
@ -1450,24 +1370,6 @@ class GladiaSTTConfiguration(BaseSTTConfiguration):
)
AZURE_SPEECH_STT_LANGUAGES = [
"en-US", "en-GB", "en-AU", "en-CA", "en-IN",
"es-ES", "es-MX",
"fr-FR", "fr-CA",
"de-DE",
"it-IT",
"ja-JP",
"ko-KR",
"zh-CN",
"pt-BR", "pt-PT",
"ru-RU",
"ar-SA",
"nl-NL",
"pl-PL",
"hi-IN",
]
@register_stt
class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
@ -1546,17 +1448,20 @@ class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
)
AZURE_EMBEDDING_MODELS = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
@register_embeddings
class AzureOpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
model_config = AZURE_OPENAI_PROVIDER_MODEL_CONFIG
provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
model: str = Field(
default="text-embedding-3-small",
description="Azure OpenAI embedding deployment name (must match the deployed model).",
json_schema_extra={"examples": AZURE_EMBEDDING_MODELS, "allow_custom_input": True},
description=(
"Azure OpenAI embedding deployment name. The deployment must return "
"1536-dimensional embeddings."
),
json_schema_extra={
"examples": AZURE_EMBEDDING_MODELS,
"allow_custom_input": True,
},
)
endpoint: str = Field(
description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",

View file

@ -1,6 +1,9 @@
"""Embedding services for document processing and retrieval."""
from .azure_openai_service import AzureEmbeddingAPIKeyNotConfiguredError, AzureOpenAIEmbeddingService
from .azure_openai_service import (
AzureEmbeddingAPIKeyNotConfiguredError,
AzureOpenAIEmbeddingService,
)
from .base import BaseEmbeddingService
from .openai_service import EmbeddingAPIKeyNotConfiguredError, OpenAIEmbeddingService

View file

@ -1,8 +1,8 @@
"""Azure OpenAI embedding service.
Uses the Azure OpenAI REST API for text embeddings, compatible with
text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002
deployments.
1536-dimensional embedding deployments such as text-embedding-3-small and
text-embedding-ada-002.
"""
from typing import Any, Dict, List, Optional
@ -89,11 +89,23 @@ class AzureOpenAIEmbeddingService(BaseEmbeddingService):
input=texts,
model=self.model_id,
)
return [item.embedding for item in response.data]
embeddings = [item.embedding for item in response.data]
self._validate_embedding_dimensions(embeddings)
return embeddings
except Exception as e:
logger.error(f"Error generating Azure OpenAI embeddings: {e}")
raise
def _validate_embedding_dimensions(self, embeddings: List[List[float]]) -> None:
for embedding in embeddings:
if len(embedding) != EMBEDDING_DIMENSION:
raise ValueError(
"Azure OpenAI embedding deployment "
f"{self.model_id!r} returned {len(embedding)} dimensions; "
"Dograh knowledge base storage currently supports "
f"{EMBEDDING_DIMENSION}-dimensional embeddings."
)
async def embed_query(self, query: str) -> List[float]:
"""Embed a single query text using Azure OpenAI API."""
self._ensure_configured()

View file

@ -1,4 +1,5 @@
from typing import TYPE_CHECKING
from urllib.parse import urlencode, urlparse, urlunparse
import aiohttp
from fastapi import HTTPException
@ -269,11 +270,10 @@ def create_stt_service(
language_code = getattr(user_config.stt, "language", None) or "en-US"
region = getattr(user_config.stt, "region", None) or "eastus"
# Try to map BCP-47 string to pipecat Language enum; fall back to string
try:
pipecat_language = PipecatLanguage(language_code)
except ValueError:
pipecat_language = PipecatLanguage.EN_US
pipecat_language = language_code
return AzureSTTService(
api_key=user_config.stt.api_key,
region=region,
@ -806,13 +806,27 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
)
endpoint = getattr(realtime_config, "endpoint", None) or ""
api_version = getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
if not endpoint:
raise HTTPException(
status_code=400,
detail="Azure Realtime requires an endpoint.",
)
_validate_runtime_service_url(endpoint, "endpoint")
api_version = (
getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
)
# Construct the Azure Realtime WebSocket URL
# https://<resource>.openai.azure.com/openai/realtime?api-version=<ver>&deployment=<model>
base_host = endpoint.rstrip("/").replace("https://", "").replace("http://", "")
wss_url = (
f"wss://{base_host}/openai/realtime"
f"?api-version={api_version}&deployment={model}"
parsed_endpoint = urlparse(endpoint)
wss_url = urlunparse(
(
"wss",
parsed_endpoint.netloc,
"/openai/realtime",
"",
urlencode({"api-version": api_version, "deployment": model}),
"",
)
)
return DograhAzureRealtimeLLMService(
api_key=api_key,

View file

@ -260,7 +260,10 @@ async def _perform_retrieval(
"Model Configurations > Embedding."
)
if embeddings_provider == ServiceProviders.AZURE.value and embeddings_endpoint:
if (
embeddings_provider == ServiceProviders.AZURE.value
and embeddings_endpoint
):
embedding_service = AzureOpenAIEmbeddingService(
db_client=db_client,
api_key=embeddings_api_key,

View file

@ -164,7 +164,9 @@ async def process_knowledge_base_document(
embeddings_model = user_config.embeddings.model
embeddings_base_url = getattr(user_config.embeddings, "base_url", None)
embeddings_endpoint = getattr(user_config.embeddings, "endpoint", None)
embeddings_api_version = getattr(user_config.embeddings, "api_version", None)
embeddings_api_version = getattr(
user_config.embeddings, "api_version", None
)
logger.info(
f"Using user embeddings config: provider={embeddings_provider}, "
f"model={embeddings_model}"

View file

@ -3,8 +3,24 @@
from types import SimpleNamespace
from unittest.mock import patch
from api.services.configuration.registry import ServiceProviders
from api.services.pipecat.service_factory import create_stt_service, create_tts_service
import pytest
from fastapi import HTTPException
from api.services.configuration.check_validity import UserConfigurationValidator
from api.services.configuration.registry import (
AzureRealtimeLLMConfiguration,
AzureSpeechSTTConfiguration,
AzureSpeechTTSConfiguration,
ServiceProviders,
)
from api.services.gen_ai.embedding.azure_openai_service import (
AzureOpenAIEmbeddingService,
)
from api.services.pipecat.service_factory import (
create_realtime_llm_service,
create_stt_service,
create_tts_service,
)
def _audio_config():
@ -79,3 +95,88 @@ def test_create_azure_speech_stt_service():
assert kwargs["api_key"] == "test-subscription-key"
assert kwargs["region"] == "eastus"
assert kwargs["sample_rate"] == 16000
def test_create_azure_speech_stt_service_preserves_custom_language():
user_config = SimpleNamespace(
stt=SimpleNamespace(
provider=ServiceProviders.AZURE_SPEECH.value,
api_key="test-subscription-key",
region="eastus",
language="custom-locale",
model="latest_long",
)
)
with patch("api.services.pipecat.service_factory.AzureSTTService") as mock_service:
create_stt_service(user_config, _audio_config())
kwargs = mock_service.call_args.kwargs
assert kwargs["settings"].language == "custom-locale"
def test_validator_accepts_azure_speech_services():
validator = UserConfigurationValidator()
assert (
validator._validate_service(
AzureSpeechTTSConfiguration(api_key="test-key"),
"tts",
)
== []
)
assert (
validator._validate_service(
AzureSpeechSTTConfiguration(api_key="test-key"),
"stt",
)
== []
)
def test_validator_accepts_azure_realtime_service(monkeypatch):
monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "oss")
validator = UserConfigurationValidator()
assert (
validator._validate_service(
AzureRealtimeLLMConfiguration(
api_key="test-key",
endpoint="https://example.openai.azure.com",
),
"realtime",
)
== []
)
def test_create_azure_realtime_blocks_private_endpoint_in_saas(monkeypatch):
monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas")
user_config = SimpleNamespace(
realtime=SimpleNamespace(
provider=ServiceProviders.AZURE_REALTIME.value,
api_key="test-key",
endpoint="http://10.0.0.10",
api_version="2025-04-01-preview",
model="gpt-4o-realtime-preview",
voice="alloy",
)
)
with pytest.raises(HTTPException) as exc_info:
create_realtime_llm_service(user_config, _audio_config())
assert exc_info.value.status_code == 400
assert "public IP" in exc_info.value.detail
def test_azure_embedding_service_rejects_wrong_dimension():
service = AzureOpenAIEmbeddingService(
db_client=SimpleNamespace(),
api_key=None,
endpoint=None,
model_id="text-embedding-3-large",
)
with pytest.raises(ValueError, match="1536-dimensional"):
service._validate_embedding_dimensions([[0.0] * 3072])