From 34777145547ded621ce664232a17fcae07f21660 Mon Sep 17 00:00:00 2001
From: Hridayesh Gupta <hridayesh.gupta@intelliticks.com>
Date: Tue, 19 May 2026 12:26:59 +0530
Subject: [PATCH] support for cerebras

---
 api/services/configuration/registry.py  | 25 ++++++++++++++++++++++++
 api/services/pipecat/service_factory.py |  5 +++++
 api/services/pricing/llm.py             | 26 +++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py
index f05c5f71..0cbf86b3 100644
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@@ -79,6 +79,7 @@ class ServiceProviders(str, Enum):
     GOOGLE_REALTIME = "google_realtime"
     GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
     AZURE_REALTIME = "azure_realtime"
+    CEREBRAS = "cerebras"
 
 
 class BaseServiceConfiguration(BaseModel):
@@ -106,6 +107,8 @@ class BaseServiceConfiguration(BaseModel):
         ServiceProviders.GOOGLE_VERTEX_REALTIME,
         ServiceProviders.AZURE_REALTIME,
         ServiceProviders.SARVAM,
+        ServiceProviders.CEREBRAS,
+        # ServiceProviders.SARVAM,
     ]
     api_key: str | list[str]
 
@@ -718,6 +721,27 @@ class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
             "examples": AZURE_REALTIME_API_VERSIONS,
         },
     )
+CEREBRAS_MODELS = [
+    "llama3.1-8b",
+    "llama3.1-70b",
+    "llama-3.3-70b",
+    "gpt-oss-120b",
+    "qwen-3-235b-a22b-instruct-2507",
+    "zai-glm-4.7",
+]
+
+
+@register_llm
+class CerebrasLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.CEREBRAS] = ServiceProviders.CEREBRAS
+    model: str = Field(
+        default="llama3.1-8b",
+        json_schema_extra={
+            "examples": CEREBRAS_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+
 
 
 REALTIME_PROVIDERS = {
@@ -743,6 +767,7 @@ LLMConfig = Annotated[
         SpeachesLLMConfiguration,
         MiniMaxLLMConfiguration,
         SarvamLLMConfiguration,
+        CerebrasLLMConfiguration,
     ],
     Field(discriminator="provider"),
 ]
diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py
index 8ed96e40..bad659a0 100644
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@@ -672,6 +672,11 @@ def create_llm_service_from_provider(
                 model=model,
                 temperature=temperature if temperature is not None else 0.5,
             ),
+    elif provider == ServiceProviders.CEREBRAS.value:
+        from pipecat.services.cerebras.llm import CerebrasLLMService, CerebrasLLMSettings
+        return CerebrasLLMService(
+            api_key=api_key,
+            settings=CerebrasLLMSettings(model=model, temperature=0.1),
         )
     else:
         raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
diff --git a/api/services/pricing/llm.py b/api/services/pricing/llm.py
index addb59bc..5339575c 100644
--- a/api/services/pricing/llm.py
+++ b/api/services/pricing/llm.py
@@ -140,4 +140,30 @@ LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = {
             / 1000000,  # $1.60 per 1M tokens if using data zone
         )
     },
+    ServiceProviders.CEREBRAS: {
+        "llama3.1-8b": TokenPricingModel(
+            prompt_token_price=Decimal("0.10") / 1000000,
+            completion_token_price=Decimal("0.10") / 1000000,
+        ),
+        "llama3.1-70b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "llama-3.3-70b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "gpt-oss-120b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "qwen-3-235b-a22b-instruct-2507": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "zai-glm-4.7": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+    },
 }