From 34777145547ded621ce664232a17fcae07f21660 Mon Sep 17 00:00:00 2001 From: Hridayesh Gupta Date: Tue, 19 May 2026 12:26:59 +0530 Subject: [PATCH] support for cerebras --- api/services/configuration/registry.py | 25 ++++++++++++++++++++++++ api/services/pipecat/service_factory.py | 5 +++++ api/services/pricing/llm.py | 26 +++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index f05c5f71..0cbf86b3 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -79,6 +79,7 @@ class ServiceProviders(str, Enum): GOOGLE_REALTIME = "google_realtime" GOOGLE_VERTEX_REALTIME = "google_vertex_realtime" AZURE_REALTIME = "azure_realtime" + CEREBRAS = "cerebras" class BaseServiceConfiguration(BaseModel): @@ -106,6 +107,8 @@ class BaseServiceConfiguration(BaseModel): ServiceProviders.GOOGLE_VERTEX_REALTIME, ServiceProviders.AZURE_REALTIME, ServiceProviders.SARVAM, + ServiceProviders.CEREBRAS, + # ServiceProviders.SARVAM, ] api_key: str | list[str] @@ -718,6 +721,27 @@ class AzureRealtimeLLMConfiguration(BaseLLMConfiguration): "examples": AZURE_REALTIME_API_VERSIONS, }, ) +CEREBRAS_MODELS = [ + "llama3.1-8b", + "llama3.1-70b", + "llama-3.3-70b", + "gpt-oss-120b", + "qwen-3-235b-a22b-instruct-2507", + "zai-glm-4.7", +] + + +@register_llm +class CerebrasLLMConfiguration(BaseLLMConfiguration): + provider: Literal[ServiceProviders.CEREBRAS] = ServiceProviders.CEREBRAS + model: str = Field( + default="llama3.1-8b", + json_schema_extra={ + "examples": CEREBRAS_MODELS, + "allow_custom_input": True, + }, + ) + REALTIME_PROVIDERS = { @@ -743,6 +767,7 @@ LLMConfig = Annotated[ SpeachesLLMConfiguration, MiniMaxLLMConfiguration, SarvamLLMConfiguration, + CerebrasLLMConfiguration, ], Field(discriminator="provider"), ] diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 8ed96e40..bad659a0 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -672,6 +672,11 @@ def create_llm_service_from_provider( model=model, temperature=temperature if temperature is not None else 0.5, ), + elif provider == ServiceProviders.CEREBRAS.value: + from pipecat.services.cerebras.llm import CerebrasLLMService, CerebrasLLMSettings + return CerebrasLLMService( + api_key=api_key, + settings=CerebrasLLMSettings(model=model, temperature=0.1), ) else: raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}") diff --git a/api/services/pricing/llm.py b/api/services/pricing/llm.py index addb59bc..5339575c 100644 --- a/api/services/pricing/llm.py +++ b/api/services/pricing/llm.py @@ -140,4 +140,30 @@ LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = { / 1000000, # $1.60 per 1M tokens if using data zone ) }, + ServiceProviders.CEREBRAS: { + "llama3.1-8b": TokenPricingModel( + prompt_token_price=Decimal("0.10") / 1000000, + completion_token_price=Decimal("0.10") / 1000000, + ), + "llama3.1-70b": TokenPricingModel( + prompt_token_price=Decimal("0.60") / 1000000, + completion_token_price=Decimal("0.60") / 1000000, + ), + "llama-3.3-70b": TokenPricingModel( + prompt_token_price=Decimal("0.60") / 1000000, + completion_token_price=Decimal("0.60") / 1000000, + ), + "gpt-oss-120b": TokenPricingModel( + prompt_token_price=Decimal("0.60") / 1000000, + completion_token_price=Decimal("0.60") / 1000000, + ), + "qwen-3-235b-a22b-instruct-2507": TokenPricingModel( + prompt_token_price=Decimal("0.60") / 1000000, + completion_token_price=Decimal("0.60") / 1000000, + ), + "zai-glm-4.7": TokenPricingModel( + prompt_token_price=Decimal("0.60") / 1000000, + completion_token_price=Decimal("0.60") / 1000000, + ), + }, }