diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index fe8bb9f..a497e3f 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -830,6 +830,10 @@ class OpenAITTSService(BaseTTSConfiguration): default="alloy", description="OpenAI TTS voice name.", ) + base_url: str = Field( + default="https://api.openai.com/v1", + description="Override only if using an OpenAI-compatible API (e.g. local TTS, proxy).", + ) DOGRAH_TTS_MODELS = ["default"] @@ -1088,6 +1092,10 @@ class OpenAISTTConfiguration(BaseSTTConfiguration): description="OpenAI transcription model.", json_schema_extra={"examples": OPENAI_STT_MODELS}, ) + base_url: str = Field( + default="https://api.openai.com/v1", + description="Override only if using an OpenAI-compatible API (e.g. local STT, proxy).", + ) @register_stt diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index f04422e..217a593 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -119,9 +119,15 @@ def create_stt_service( sample_rate=audio_config.transport_in_sample_rate, ) elif user_config.stt.provider == ServiceProviders.OPENAI.value: + kwargs = {} + base_url = getattr(user_config.stt, "base_url", None) + if base_url: + _validate_runtime_service_url(base_url, "base_url") + kwargs["base_url"] = base_url return OpenAISTTService( api_key=user_config.stt.api_key, settings=OpenAISTTSettings(model=user_config.stt.model), + **kwargs, ) elif user_config.stt.provider == ServiceProviders.GOOGLE.value: language = getattr(user_config.stt, "language", None) or "en-US" @@ -283,12 +289,18 @@ def create_tts_service(user_config, audio_config: "AudioConfig"): silence_time_s=1.0, ) elif user_config.tts.provider == ServiceProviders.OPENAI.value: + kwargs = {} + base_url = getattr(user_config.tts, "base_url", None) + if base_url: + _validate_runtime_service_url(base_url, "base_url") + kwargs["base_url"] = base_url return OpenAITTSService( api_key=user_config.tts.api_key, settings=OpenAITTSSettings(model=user_config.tts.model), text_filters=[xml_function_tag_filter], skip_aggregator_types=["recording_router", "recording"], silence_time_s=1.0, + **kwargs, ) elif user_config.tts.provider == ServiceProviders.GOOGLE.value: model = getattr(user_config.tts, "model", None) or "chirp_3_hd" diff --git a/api/tests/test_user_configured_service_url_security.py b/api/tests/test_user_configured_service_url_security.py index 1585652..03c6755 100644 --- a/api/tests/test_user_configured_service_url_security.py +++ b/api/tests/test_user_configured_service_url_security.py @@ -11,6 +11,7 @@ from api.services.configuration.registry import ( from api.services.gen_ai.embedding.openai_service import OpenAIEmbeddingService from api.services.pipecat.service_factory import ( create_llm_service_from_provider, + create_stt_service, create_tts_service, ) from api.utils.url_security import validate_user_configured_service_url @@ -214,6 +215,80 @@ def test_runtime_blocks_elevenlabs_local_tts_base_url_in_saas(monkeypatch): assert "localhost" in exc_info.value.detail +def test_runtime_blocks_openai_stt_private_base_url_in_saas(monkeypatch): + monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas") + user_config = SimpleNamespace( + stt=SimpleNamespace( + provider=ServiceProviders.OPENAI.value, + api_key="test-key", + model="gpt-4o-transcribe", + base_url="http://10.0.0.10/v1", + ) + ) + + with pytest.raises(HTTPException) as exc_info: + create_stt_service(user_config, audio_config=None) + + assert exc_info.value.status_code == 400 + assert "public IP" in exc_info.value.detail + + +def test_runtime_blocks_openai_stt_localhost_base_url_in_saas(monkeypatch): + monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas") + user_config = SimpleNamespace( + stt=SimpleNamespace( + provider=ServiceProviders.OPENAI.value, + api_key="test-key", + model="gpt-4o-transcribe", + base_url="http://localhost:8000/v1", + ) + ) + + with pytest.raises(HTTPException) as exc_info: + create_stt_service(user_config, audio_config=None) + + assert exc_info.value.status_code == 400 + assert "localhost" in exc_info.value.detail + + +def test_runtime_blocks_openai_tts_private_base_url_in_saas(monkeypatch): + monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas") + user_config = SimpleNamespace( + tts=SimpleNamespace( + provider=ServiceProviders.OPENAI.value, + api_key="test-key", + model="gpt-4o-mini-tts", + voice="alloy", + base_url="http://10.0.0.10/v1", + ) + ) + + with pytest.raises(HTTPException) as exc_info: + create_tts_service(user_config, audio_config=None) + + assert exc_info.value.status_code == 400 + assert "public IP" in exc_info.value.detail + + +def test_runtime_blocks_openai_tts_localhost_base_url_in_saas(monkeypatch): + monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas") + user_config = SimpleNamespace( + tts=SimpleNamespace( + provider=ServiceProviders.OPENAI.value, + api_key="test-key", + model="gpt-4o-mini-tts", + voice="alloy", + base_url="http://localhost:8000/v1", + ) + ) + + with pytest.raises(HTTPException) as exc_info: + create_tts_service(user_config, audio_config=None) + + assert exc_info.value.status_code == 400 + assert "localhost" in exc_info.value.detail + + def test_embedding_service_blocks_private_base_url_in_saas(monkeypatch): monkeypatch.setattr("api.utils.url_security.DEPLOYMENT_MODE", "saas")