mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
fix: add validation for URL and params
This commit is contained in:
parent
858c474139
commit
7a3b1c4a4b
11 changed files with 327 additions and 129 deletions
|
|
@ -370,7 +370,10 @@ async def search_chunks(
|
|||
try:
|
||||
# Import here to avoid circular dependency
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from api.services.gen_ai import AzureOpenAIEmbeddingService, OpenAIEmbeddingService
|
||||
from api.services.gen_ai import (
|
||||
AzureOpenAIEmbeddingService,
|
||||
OpenAIEmbeddingService,
|
||||
)
|
||||
|
||||
# Try to get user's embeddings configuration
|
||||
user_config = await db_client.get_user_configurations(user.id)
|
||||
|
|
@ -385,7 +388,9 @@ async def search_chunks(
|
|||
embeddings_model = user_config.embeddings.model
|
||||
embeddings_provider = getattr(user_config.embeddings, "provider", None)
|
||||
embeddings_endpoint = getattr(user_config.embeddings, "endpoint", None)
|
||||
embeddings_api_version = getattr(user_config.embeddings, "api_version", None)
|
||||
embeddings_api_version = getattr(
|
||||
user_config.embeddings, "api_version", None
|
||||
)
|
||||
|
||||
# Initialize embedding service based on provider
|
||||
if embeddings_provider == ServiceProviders.AZURE.value and embeddings_endpoint:
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.ELEVENLABS.value: self._validate_elevenlabs_api_key,
|
||||
ServiceProviders.GOOGLE.value: self._check_google_api_key,
|
||||
ServiceProviders.AZURE.value: self._check_azure_api_key,
|
||||
ServiceProviders.AZURE_SPEECH.value: self._check_azure_speech_api_key,
|
||||
ServiceProviders.CARTESIA.value: self._check_cartesia_api_key,
|
||||
ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
|
||||
ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
|
||||
|
|
@ -54,6 +55,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.ULTRAVOX_REALTIME.value: self._check_ultravox_realtime_api_key,
|
||||
ServiceProviders.GOOGLE_REALTIME.value: self._check_google_api_key,
|
||||
ServiceProviders.GOOGLE_VERTEX_REALTIME.value: self._check_google_vertex_realtime_api_key,
|
||||
ServiceProviders.AZURE_REALTIME.value: self._check_azure_realtime_api_key,
|
||||
ServiceProviders.ASSEMBLYAI.value: self._check_assemblyai_api_key,
|
||||
ServiceProviders.GLADIA.value: self._check_gladia_api_key,
|
||||
ServiceProviders.RIME.value: self._check_rime_api_key,
|
||||
|
|
@ -313,6 +315,12 @@ class UserConfigurationValidator:
|
|||
def _check_azure_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_azure_speech_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_azure_realtime_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_cartesia_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,14 @@
|
|||
from .azure import (
|
||||
AZURE_EMBEDDING_MODELS,
|
||||
AZURE_MODELS,
|
||||
AZURE_REALTIME_API_VERSIONS,
|
||||
AZURE_REALTIME_MODELS,
|
||||
AZURE_REALTIME_VOICES,
|
||||
AZURE_SPEECH_REGIONS,
|
||||
AZURE_SPEECH_STT_LANGUAGES,
|
||||
AZURE_SPEECH_TTS_LANGUAGES,
|
||||
AZURE_SPEECH_TTS_VOICES,
|
||||
)
|
||||
from .deepgram import DEEPGRAM_LANGUAGES, DEEPGRAM_STT_MODELS
|
||||
from .gladia import GLADIA_STT_LANGUAGES, GLADIA_STT_MODELS
|
||||
from .google import (
|
||||
|
|
@ -27,6 +38,15 @@ from .sarvam import (
|
|||
from .speechmatics import SPEECHMATICS_STT_LANGUAGES
|
||||
|
||||
__all__ = [
|
||||
"AZURE_EMBEDDING_MODELS",
|
||||
"AZURE_MODELS",
|
||||
"AZURE_REALTIME_API_VERSIONS",
|
||||
"AZURE_REALTIME_MODELS",
|
||||
"AZURE_REALTIME_VOICES",
|
||||
"AZURE_SPEECH_REGIONS",
|
||||
"AZURE_SPEECH_STT_LANGUAGES",
|
||||
"AZURE_SPEECH_TTS_LANGUAGES",
|
||||
"AZURE_SPEECH_TTS_VOICES",
|
||||
"DEEPGRAM_LANGUAGES",
|
||||
"DEEPGRAM_STT_MODELS",
|
||||
"GLADIA_STT_LANGUAGES",
|
||||
|
|
|
|||
125
api/services/configuration/options/azure.py
Normal file
125
api/services/configuration/options/azure.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
AZURE_MODELS = ["gpt-4.1-mini"]
|
||||
|
||||
AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
|
||||
AZURE_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"coral",
|
||||
"echo",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse",
|
||||
]
|
||||
AZURE_REALTIME_API_VERSIONS = [
|
||||
"2025-04-01-preview",
|
||||
"2024-10-01-preview",
|
||||
"2024-12-17",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_REGIONS = [
|
||||
"eastus",
|
||||
"eastus2",
|
||||
"westus",
|
||||
"westus2",
|
||||
"westus3",
|
||||
"centralus",
|
||||
"northcentralus",
|
||||
"southcentralus",
|
||||
"westcentralus",
|
||||
"westeurope",
|
||||
"northeurope",
|
||||
"uksouth",
|
||||
"ukwest",
|
||||
"francecentral",
|
||||
"switzerlandnorth",
|
||||
"germanywestcentral",
|
||||
"norwayeast",
|
||||
"australiaeast",
|
||||
"eastasia",
|
||||
"southeastasia",
|
||||
"japaneast",
|
||||
"japanwest",
|
||||
"koreacentral",
|
||||
"centralindia",
|
||||
"southindia",
|
||||
"brazilsouth",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_TTS_LANGUAGES = [
|
||||
"en-US",
|
||||
"en-GB",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-IN",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"fr-FR",
|
||||
"fr-CA",
|
||||
"de-DE",
|
||||
"it-IT",
|
||||
"ja-JP",
|
||||
"ko-KR",
|
||||
"zh-CN",
|
||||
"zh-HK",
|
||||
"zh-TW",
|
||||
"pt-BR",
|
||||
"pt-PT",
|
||||
"ru-RU",
|
||||
"ar-SA",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"sv-SE",
|
||||
"hi-IN",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_TTS_VOICES = [
|
||||
"en-US-AriaNeural",
|
||||
"en-US-GuyNeural",
|
||||
"en-US-JennyNeural",
|
||||
"en-US-DavisNeural",
|
||||
"en-US-AmberNeural",
|
||||
"en-US-AnaNeural",
|
||||
"en-US-AshleyNeural",
|
||||
"en-US-BrandonNeural",
|
||||
"en-US-ChristopherNeural",
|
||||
"en-US-ElizabethNeural",
|
||||
"en-US-EricNeural",
|
||||
"en-US-JacobNeural",
|
||||
"en-US-MichelleNeural",
|
||||
"en-US-MonicaNeural",
|
||||
"en-US-NancyNeural",
|
||||
"en-US-RogerNeural",
|
||||
"en-US-SaraNeural",
|
||||
"en-US-SteffanNeural",
|
||||
"en-US-TonyNeural",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_STT_LANGUAGES = [
|
||||
"en-US",
|
||||
"en-GB",
|
||||
"en-AU",
|
||||
"en-CA",
|
||||
"en-IN",
|
||||
"es-ES",
|
||||
"es-MX",
|
||||
"fr-FR",
|
||||
"fr-CA",
|
||||
"de-DE",
|
||||
"it-IT",
|
||||
"ja-JP",
|
||||
"ko-KR",
|
||||
"zh-CN",
|
||||
"pt-BR",
|
||||
"pt-PT",
|
||||
"ru-RU",
|
||||
"ar-SA",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"hi-IN",
|
||||
]
|
||||
|
||||
AZURE_EMBEDDING_MODELS = [
|
||||
"text-embedding-3-small",
|
||||
"text-embedding-ada-002",
|
||||
]
|
||||
|
|
@ -5,6 +5,15 @@ from typing import Annotated, Dict, Literal, Type, TypeVar, Union
|
|||
from pydantic import BaseModel, ConfigDict, Field, computed_field, field_validator
|
||||
|
||||
from api.services.configuration.options import (
|
||||
AZURE_EMBEDDING_MODELS,
|
||||
AZURE_MODELS,
|
||||
AZURE_REALTIME_API_VERSIONS,
|
||||
AZURE_REALTIME_MODELS,
|
||||
AZURE_REALTIME_VOICES,
|
||||
AZURE_SPEECH_REGIONS,
|
||||
AZURE_SPEECH_STT_LANGUAGES,
|
||||
AZURE_SPEECH_TTS_LANGUAGES,
|
||||
AZURE_SPEECH_TTS_VOICES,
|
||||
DEEPGRAM_LANGUAGES,
|
||||
DEEPGRAM_STT_MODELS,
|
||||
GLADIA_STT_LANGUAGES,
|
||||
|
|
@ -286,7 +295,6 @@ OPENROUTER_MODELS = [
|
|||
"meta-llama/llama-3.3-70b-instruct",
|
||||
"deepseek/deepseek-chat-v3-0324",
|
||||
]
|
||||
AZURE_MODELS = ["gpt-4.1-mini"]
|
||||
DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"]
|
||||
AWS_BEDROCK_MODELS = [
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
|
|
@ -680,24 +688,6 @@ class GoogleVertexRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|||
)
|
||||
|
||||
|
||||
AZURE_REALTIME_MODELS = ["gpt-4o-realtime-preview"]
|
||||
AZURE_REALTIME_VOICES = [
|
||||
"alloy",
|
||||
"ash",
|
||||
"ballad",
|
||||
"coral",
|
||||
"echo",
|
||||
"sage",
|
||||
"shimmer",
|
||||
"verse",
|
||||
]
|
||||
AZURE_REALTIME_API_VERSIONS = [
|
||||
"2025-04-01-preview",
|
||||
"2024-10-01-preview",
|
||||
"2024-12-17",
|
||||
]
|
||||
|
||||
|
||||
@register_service(ServiceType.REALTIME)
|
||||
class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
|
||||
model_config = AZURE_REALTIME_PROVIDER_MODEL_CONFIG
|
||||
|
|
@ -1090,76 +1080,6 @@ class MiniMaxTTSConfiguration(BaseTTSConfiguration):
|
|||
)
|
||||
|
||||
|
||||
AZURE_SPEECH_REGIONS = [
|
||||
"eastus",
|
||||
"eastus2",
|
||||
"westus",
|
||||
"westus2",
|
||||
"westus3",
|
||||
"centralus",
|
||||
"northcentralus",
|
||||
"southcentralus",
|
||||
"westcentralus",
|
||||
"westeurope",
|
||||
"northeurope",
|
||||
"uksouth",
|
||||
"ukwest",
|
||||
"francecentral",
|
||||
"switzerlandnorth",
|
||||
"germanywestcentral",
|
||||
"norwayeast",
|
||||
"australiaeast",
|
||||
"eastasia",
|
||||
"southeastasia",
|
||||
"japaneast",
|
||||
"japanwest",
|
||||
"koreacentral",
|
||||
"centralindia",
|
||||
"southindia",
|
||||
"brazilsouth",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_TTS_LANGUAGES = [
|
||||
"en-US", "en-GB", "en-AU", "en-CA", "en-IN",
|
||||
"es-ES", "es-MX",
|
||||
"fr-FR", "fr-CA",
|
||||
"de-DE",
|
||||
"it-IT",
|
||||
"ja-JP",
|
||||
"ko-KR",
|
||||
"zh-CN", "zh-HK", "zh-TW",
|
||||
"pt-BR", "pt-PT",
|
||||
"ru-RU",
|
||||
"ar-SA",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"sv-SE",
|
||||
"hi-IN",
|
||||
]
|
||||
|
||||
AZURE_SPEECH_TTS_VOICES = [
|
||||
"en-US-AriaNeural",
|
||||
"en-US-GuyNeural",
|
||||
"en-US-JennyNeural",
|
||||
"en-US-DavisNeural",
|
||||
"en-US-AmberNeural",
|
||||
"en-US-AnaNeural",
|
||||
"en-US-AshleyNeural",
|
||||
"en-US-BrandonNeural",
|
||||
"en-US-ChristopherNeural",
|
||||
"en-US-ElizabethNeural",
|
||||
"en-US-EricNeural",
|
||||
"en-US-JacobNeural",
|
||||
"en-US-MichelleNeural",
|
||||
"en-US-MonicaNeural",
|
||||
"en-US-NancyNeural",
|
||||
"en-US-RogerNeural",
|
||||
"en-US-SaraNeural",
|
||||
"en-US-SteffanNeural",
|
||||
"en-US-TonyNeural",
|
||||
]
|
||||
|
||||
|
||||
@register_tts
|
||||
class AzureSpeechTTSConfiguration(BaseTTSConfiguration):
|
||||
model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
|
||||
|
|
@ -1450,24 +1370,6 @@ class GladiaSTTConfiguration(BaseSTTConfiguration):
|
|||
)
|
||||
|
||||
|
||||
AZURE_SPEECH_STT_LANGUAGES = [
|
||||
"en-US", "en-GB", "en-AU", "en-CA", "en-IN",
|
||||
"es-ES", "es-MX",
|
||||
"fr-FR", "fr-CA",
|
||||
"de-DE",
|
||||
"it-IT",
|
||||
"ja-JP",
|
||||
"ko-KR",
|
||||
"zh-CN",
|
||||
"pt-BR", "pt-PT",
|
||||
"ru-RU",
|
||||
"ar-SA",
|
||||
"nl-NL",
|
||||
"pl-PL",
|
||||
"hi-IN",
|
||||
]
|
||||
|
||||
|
||||
@register_stt
|
||||
class AzureSpeechSTTConfiguration(BaseSTTConfiguration):
|
||||
model_config = AZURE_SPEECH_PROVIDER_MODEL_CONFIG
|
||||
|
|
@ -1546,17 +1448,20 @@ class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
|
|||
)
|
||||
|
||||
|
||||
AZURE_EMBEDDING_MODELS = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
|
||||
|
||||
|
||||
@register_embeddings
|
||||
class AzureOpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration):
|
||||
model_config = AZURE_OPENAI_PROVIDER_MODEL_CONFIG
|
||||
provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE
|
||||
model: str = Field(
|
||||
default="text-embedding-3-small",
|
||||
description="Azure OpenAI embedding deployment name (must match the deployed model).",
|
||||
json_schema_extra={"examples": AZURE_EMBEDDING_MODELS, "allow_custom_input": True},
|
||||
description=(
|
||||
"Azure OpenAI embedding deployment name. The deployment must return "
|
||||
"1536-dimensional embeddings."
|
||||
),
|
||||
json_schema_extra={
|
||||
"examples": AZURE_EMBEDDING_MODELS,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
endpoint: str = Field(
|
||||
description="Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
"""Embedding services for document processing and retrieval."""
|
||||
|
||||
from .azure_openai_service import AzureEmbeddingAPIKeyNotConfiguredError, AzureOpenAIEmbeddingService
|
||||
from .azure_openai_service import (
|
||||
AzureEmbeddingAPIKeyNotConfiguredError,
|
||||
AzureOpenAIEmbeddingService,
|
||||
)
|
||||
from .base import BaseEmbeddingService
|
||||
from .openai_service import EmbeddingAPIKeyNotConfiguredError, OpenAIEmbeddingService
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""Azure OpenAI embedding service.
|
||||
|
||||
Uses the Azure OpenAI REST API for text embeddings, compatible with
|
||||
text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002
|
||||
deployments.
|
||||
1536-dimensional embedding deployments such as text-embedding-3-small and
|
||||
text-embedding-ada-002.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
|
@ -89,11 +89,23 @@ class AzureOpenAIEmbeddingService(BaseEmbeddingService):
|
|||
input=texts,
|
||||
model=self.model_id,
|
||||
)
|
||||
return [item.embedding for item in response.data]
|
||||
embeddings = [item.embedding for item in response.data]
|
||||
self._validate_embedding_dimensions(embeddings)
|
||||
return embeddings
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Azure OpenAI embeddings: {e}")
|
||||
raise
|
||||
|
||||
def _validate_embedding_dimensions(self, embeddings: List[List[float]]) -> None:
|
||||
for embedding in embeddings:
|
||||
if len(embedding) != EMBEDDING_DIMENSION:
|
||||
raise ValueError(
|
||||
"Azure OpenAI embedding deployment "
|
||||
f"{self.model_id!r} returned {len(embedding)} dimensions; "
|
||||
"Dograh knowledge base storage currently supports "
|
||||
f"{EMBEDDING_DIMENSION}-dimensional embeddings."
|
||||
)
|
||||
|
||||
async def embed_query(self, query: str) -> List[float]:
|
||||
"""Embed a single query text using Azure OpenAI API."""
|
||||
self._ensure_configured()
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlencode, urlparse, urlunparse
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
|
|
@ -269,11 +270,10 @@ def create_stt_service(
|
|||
|
||||
language_code = getattr(user_config.stt, "language", None) or "en-US"
|
||||
region = getattr(user_config.stt, "region", None) or "eastus"
|
||||
# Try to map BCP-47 string to pipecat Language enum; fall back to string
|
||||
try:
|
||||
pipecat_language = PipecatLanguage(language_code)
|
||||
except ValueError:
|
||||
pipecat_language = PipecatLanguage.EN_US
|
||||
pipecat_language = language_code
|
||||
return AzureSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
region=region,
|
||||
|
|
@ -806,13 +806,27 @@ def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
|
|||
)
|
||||
|
||||
endpoint = getattr(realtime_config, "endpoint", None) or ""
|
||||
api_version = getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
|
||||
if not endpoint:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Azure Realtime requires an endpoint.",
|
||||
)
|
||||
_validate_runtime_service_url(endpoint, "endpoint")
|
||||
api_version = (
|
||||
getattr(realtime_config, "api_version", None) or "2025-04-01-preview"
|
||||
)
|
||||
# Construct the Azure Realtime WebSocket URL
|
||||
# https://<resource>.openai.azure.com/openai/realtime?api-version=<ver>&deployment=<model>
|
||||
base_host = endpoint.rstrip("/").replace("https://", "").replace("http://", "")
|
||||
wss_url = (
|
||||
f"wss://{base_host}/openai/realtime"
|
||||
f"?api-version={api_version}&deployment={model}"
|
||||
parsed_endpoint = urlparse(endpoint)
|
||||
wss_url = urlunparse(
|
||||
(
|
||||
"wss",
|
||||
parsed_endpoint.netloc,
|
||||
"/openai/realtime",
|
||||
"",
|
||||
urlencode({"api-version": api_version, "deployment": model}),
|
||||
"",
|
||||
)
|
||||
)
|
||||
return DograhAzureRealtimeLLMService(
|
||||
api_key=api_key,
|
||||
|
|
|
|||
|
|
@ -260,7 +260,10 @@ async def _perform_retrieval(
|
|||
"Model Configurations > Embedding."
|
||||
)
|
||||
|
||||
if embeddings_provider == ServiceProviders.AZURE.value and embeddings_endpoint:
|
||||
if (
|
||||
embeddings_provider == ServiceProviders.AZURE.value
|
||||
and embeddings_endpoint
|
||||
):
|
||||
embedding_service = AzureOpenAIEmbeddingService(
|
||||
db_client=db_client,
|
||||
api_key=embeddings_api_key,
|
||||
|
|
|
|||
|
|
@ -164,7 +164,9 @@ async def process_knowledge_base_document(
|
|||
embeddings_model = user_config.embeddings.model
|
||||
embeddings_base_url = getattr(user_config.embeddings, "base_url", None)
|
||||
embeddings_endpoint = getattr(user_config.embeddings, "endpoint", None)
|
||||
embeddings_api_version = getattr(user_config.embeddings, "api_version", None)
|
||||
embeddings_api_version = getattr(
|
||||
user_config.embeddings, "api_version", None
|
||||
)
|
||||
logger.info(
|
||||
f"Using user embeddings config: provider={embeddings_provider}, "
|
||||
f"model={embeddings_model}"
|
||||
|
|
|
|||
|
|
@ -3,8 +3,24 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from api.services.pipecat.service_factory import create_stt_service, create_tts_service
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from api.services.configuration.check_validity import UserConfigurationValidator
|
||||
from api.services.configuration.registry import (
|
||||
AzureRealtimeLLMConfiguration,
|
||||
AzureSpeechSTTConfiguration,
|
||||
AzureSpeechTTSConfiguration,
|
||||
ServiceProviders,
|
||||
)
|
||||
from api.services.gen_ai.embedding.azure_openai_service import (
|
||||
AzureOpenAIEmbeddingService,
|
||||
)
|
||||
from api.services.pipecat.service_factory import (
|
||||
create_realtime_llm_service,
|
||||
create_stt_service,
|
||||
create_tts_service,
|
||||
)
|
||||
|
||||
|
||||
def _audio_config():
|
||||
|
|
@ -79,3 +95,88 @@ def test_create_azure_speech_stt_service():
|
|||
assert kwargs["api_key"] == "test-subscription-key"
|
||||
assert kwargs["region"] == "eastus"
|
||||
assert kwargs["sample_rate"] == 16000
|
||||
|
||||
|
||||
def test_create_azure_speech_stt_service_preserves_custom_language():
|
||||
user_config = SimpleNamespace(
|
||||
stt=SimpleNamespace(
|
||||
provider=ServiceProviders.AZURE_SPEECH.value,
|
||||
api_key="test-subscription-key",
|
||||
region="eastus",
|
||||
language="custom-locale",
|
||||
model="latest_long",
|
||||
)
|
||||
)
|
||||
|
||||
with patch("api.services.pipecat.service_factory.AzureSTTService") as mock_service:
|
||||
create_stt_service(user_config, _audio_config())
|
||||
|
||||
kwargs = mock_service.call_args.kwargs
|
||||
assert kwargs["settings"].language == "custom-locale"
|
||||
|
||||
|
||||
def test_validator_accepts_azure_speech_services():
|
||||
validator = UserConfigurationValidator()
|
||||
|
||||
assert (
|
||||
validator._validate_service(
|
||||
AzureSpeechTTSConfiguration(api_key="test-key"),
|
||||
"tts",
|
||||
)
|
||||
== []
|
||||
)
|
||||
assert (
|
||||
validator._validate_service(
|
||||
AzureSpeechSTTConfiguration(api_key="test-key"),
|
||||
"stt",
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
|
||||
def test_validator_accepts_azure_realtime_service(monkeypatch):
|
||||
monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "oss")
|
||||
validator = UserConfigurationValidator()
|
||||
|
||||
assert (
|
||||
validator._validate_service(
|
||||
AzureRealtimeLLMConfiguration(
|
||||
api_key="test-key",
|
||||
endpoint="https://example.openai.azure.com",
|
||||
),
|
||||
"realtime",
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
|
||||
def test_create_azure_realtime_blocks_private_endpoint_in_saas(monkeypatch):
|
||||
monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas")
|
||||
user_config = SimpleNamespace(
|
||||
realtime=SimpleNamespace(
|
||||
provider=ServiceProviders.AZURE_REALTIME.value,
|
||||
api_key="test-key",
|
||||
endpoint="http://10.0.0.10",
|
||||
api_version="2025-04-01-preview",
|
||||
model="gpt-4o-realtime-preview",
|
||||
voice="alloy",
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
create_realtime_llm_service(user_config, _audio_config())
|
||||
|
||||
assert exc_info.value.status_code == 400
|
||||
assert "public IP" in exc_info.value.detail
|
||||
|
||||
|
||||
def test_azure_embedding_service_rejects_wrong_dimension():
|
||||
service = AzureOpenAIEmbeddingService(
|
||||
db_client=SimpleNamespace(),
|
||||
api_key=None,
|
||||
endpoint=None,
|
||||
model_id="text-embedding-3-large",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="1536-dimensional"):
|
||||
service._validate_embedding_dimensions([[0.0] * 3072])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue