mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-04 10:52:17 +02:00
feat: add gemini realtime and speaches integration
- Add gemini realtime support - Add speaches support for locally hosted LLMs
This commit is contained in:
parent
2eaaabd936
commit
ee2028eb2d
19 changed files with 531 additions and 185 deletions
|
|
@ -35,6 +35,7 @@ from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
|
|||
from pipecat.services.openrouter.llm import OpenRouterLLMService, OpenRouterLLMSettings
|
||||
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
|
||||
from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings
|
||||
from pipecat.services.speaches.llm import SpeachesLLMService, SpeachesLLMSettings
|
||||
from pipecat.services.speaches.stt import SpeachesSTTService, SpeachesSTTSettings
|
||||
from pipecat.services.speaches.tts import SpeachesTTSService, SpeachesTTSSettings
|
||||
from pipecat.services.speechmatics.stt import (
|
||||
|
|
@ -63,7 +64,6 @@ def create_stt_service(
|
|||
if user_config.stt.provider == ServiceProviders.DEEPGRAM.value:
|
||||
# Check if using Flux model (English-only, no language selection)
|
||||
if user_config.stt.model == "flux-general-en":
|
||||
logger.debug("Using DeepGram Flux Model")
|
||||
return DeepgramFluxSTTService(
|
||||
api_key=user_config.stt.api_key,
|
||||
settings=DeepgramFluxSTTSettings(
|
||||
|
|
@ -395,15 +395,75 @@ def create_llm_service_from_provider(
|
|||
settings=AWSBedrockLLMSettings(model=model),
|
||||
)
|
||||
elif provider == ServiceProviders.SPEACHES.value:
|
||||
return OpenAILLMService(
|
||||
return SpeachesLLMService(
|
||||
base_url=base_url or "http://localhost:11434/v1",
|
||||
api_key=api_key or "none",
|
||||
settings=OpenAILLMSettings(model=model),
|
||||
settings=SpeachesLLMSettings(model=model),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
|
||||
|
||||
|
||||
def create_realtime_llm_service(user_config, audio_config: "AudioConfig"):
|
||||
"""Create a realtime (speech-to-speech) LLM service that handles STT+LLM+TTS.
|
||||
|
||||
These services bypass separate STT/TTS and handle audio directly via
|
||||
a bidirectional WebSocket connection. Reads from user_config.realtime.
|
||||
"""
|
||||
realtime_config = user_config.realtime
|
||||
provider = realtime_config.provider
|
||||
model = realtime_config.model
|
||||
api_key = realtime_config.api_key
|
||||
voice = getattr(realtime_config, "voice", None)
|
||||
|
||||
logger.info(
|
||||
f"Creating realtime LLM service: provider={provider}, model={model}, voice={voice}"
|
||||
)
|
||||
|
||||
if provider == ServiceProviders.OPENAI_REALTIME.value:
|
||||
from pipecat.services.openai.realtime.events import (
|
||||
AudioConfiguration,
|
||||
AudioInput,
|
||||
AudioOutput,
|
||||
InputAudioTranscription,
|
||||
SessionProperties,
|
||||
)
|
||||
from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
|
||||
|
||||
return OpenAIRealtimeLLMService(
|
||||
api_key=api_key,
|
||||
settings=OpenAIRealtimeLLMService.Settings(
|
||||
model=model,
|
||||
session_properties=SessionProperties(
|
||||
audio=AudioConfiguration(
|
||||
input=AudioInput(
|
||||
transcription=InputAudioTranscription(),
|
||||
),
|
||||
output=AudioOutput(
|
||||
voice=voice or "alloy",
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
elif provider == ServiceProviders.GOOGLE_REALTIME.value:
|
||||
from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
|
||||
|
||||
# Gemini Live enables input/output audio transcription by default
|
||||
# in its _connect() method — no need to configure it explicitly.
|
||||
return GeminiLiveLLMService(
|
||||
api_key=api_key,
|
||||
settings=GeminiLiveLLMService.Settings(
|
||||
model=model,
|
||||
voice=voice or "Puck", # vad=GeminiVADParams(disabled=True)
|
||||
),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid realtime LLM provider {provider}"
|
||||
)
|
||||
|
||||
|
||||
def create_llm_service(user_config):
|
||||
"""Create and return appropriate LLM service based on user configuration."""
|
||||
provider = user_config.llm.provider
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue