feat: add google stt and tts. add folders to organize agents

This commit is contained in:
Abhishek Kumar 2026-05-22 14:36:50 +05:30
parent 21951eca18
commit ad2fa07058
52 changed files with 3412 additions and 621 deletions

View file

@ -26,6 +26,8 @@ from pipecat.services.dograh.tts import DograhTTSService, DograhTTSSettings
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings
from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings
from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings
from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings
from pipecat.services.google.tts import GoogleTTSService, GoogleTTSSettings
from pipecat.services.groq.llm import GroqLLMService, GroqLLMSettings
from pipecat.services.openai.base_llm import OpenAILLMSettings
from pipecat.services.openai.llm import OpenAILLMService
@ -101,6 +103,23 @@ def create_stt_service(
api_key=user_config.stt.api_key,
settings=OpenAISTTSettings(model=user_config.stt.model),
)
elif user_config.stt.provider == ServiceProviders.GOOGLE.value:
language = getattr(user_config.stt, "language", None) or "en-US"
location = getattr(user_config.stt, "location", None) or "global"
credentials = getattr(user_config.stt, "credentials", None)
settings_kwargs = {"model": user_config.stt.model}
try:
settings_kwargs["languages"] = [Language(language)]
except ValueError:
settings_kwargs["language_codes"] = [language]
return GoogleSTTService(
credentials=credentials,
location=location,
settings=GoogleSTTSettings(**settings_kwargs),
sample_rate=audio_config.transport_in_sample_rate,
)
elif user_config.stt.provider == ServiceProviders.CARTESIA.value:
return CartesiaSTTService(
api_key=user_config.stt.api_key,
@ -241,6 +260,30 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.GOOGLE.value:
model = getattr(user_config.tts, "model", None) or "chirp_3_hd"
language = getattr(user_config.tts, "language", None) or "en-US"
voice = getattr(user_config.tts, "voice", None) or "en-US-Chirp3-HD-Charon"
speed = getattr(user_config.tts, "speed", None)
location = getattr(user_config.tts, "location", None) or None
credentials = getattr(user_config.tts, "credentials", None)
settings_kwargs = {
"model": model,
"voice": voice,
"language": language,
}
if speed is not None and speed != 1.0:
settings_kwargs["speaking_rate"] = speed
return GoogleTTSService(
credentials=credentials,
location=location,
settings=GoogleTTSSettings(**settings_kwargs),
text_filters=[xml_function_tag_filter],
skip_aggregator_types=["recording_router", "recording"],
silence_time_s=1.0,
)
elif user_config.tts.provider == ServiceProviders.ELEVENLABS.value:
# Backward compatible with older configuration "Name - voice_id"
try: