support for cerebras

2026-06-13 08:15:21 +02:00 · 2026-05-19 12:26:59 +05:30 · 2026-05-19 12:26:59 +05:30 · 3477714554
commit 3477714554
parent 97777e9ccf
3 changed files with 56 additions and 0 deletions
--- a/api/services/configuration/registry.py
+++ b/api/services/configuration/registry.py
@ -79,6 +79,7 @@ class ServiceProviders(str, Enum):
    GOOGLE_REALTIME = "google_realtime"
    GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
    AZURE_REALTIME = "azure_realtime"
+    CEREBRAS = "cerebras"


 class BaseServiceConfiguration(BaseModel):
@ -106,6 +107,8 @@ class BaseServiceConfiguration(BaseModel):
        ServiceProviders.GOOGLE_VERTEX_REALTIME,
        ServiceProviders.AZURE_REALTIME,
        ServiceProviders.SARVAM,
+        ServiceProviders.CEREBRAS,
+        # ServiceProviders.SARVAM,
    ]
    api_key: str | list[str]

@ -718,6 +721,27 @@ class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
            "examples": AZURE_REALTIME_API_VERSIONS,
        },
    )
+CEREBRAS_MODELS = [
+    "llama3.1-8b",
+    "llama3.1-70b",
+    "llama-3.3-70b",
+    "gpt-oss-120b",
+    "qwen-3-235b-a22b-instruct-2507",
+    "zai-glm-4.7",
+]
+
+
+@register_llm
+class CerebrasLLMConfiguration(BaseLLMConfiguration):
+    provider: Literal[ServiceProviders.CEREBRAS] = ServiceProviders.CEREBRAS
+    model: str = Field(
+        default="llama3.1-8b",
+        json_schema_extra={
+            "examples": CEREBRAS_MODELS,
+            "allow_custom_input": True,
+        },
+    )
+


 REALTIME_PROVIDERS = {
@ -743,6 +767,7 @@ LLMConfig = Annotated[
        SpeachesLLMConfiguration,
        MiniMaxLLMConfiguration,
        SarvamLLMConfiguration,
+        CerebrasLLMConfiguration,
    ],
    Field(discriminator="provider"),
 ]
--- a/api/services/pipecat/service_factory.py
+++ b/api/services/pipecat/service_factory.py
@ -672,6 +672,11 @@ def create_llm_service_from_provider(
                model=model,
                temperature=temperature if temperature is not None else 0.5,
            ),
+    elif provider == ServiceProviders.CEREBRAS.value:
+        from pipecat.services.cerebras.llm import CerebrasLLMService, CerebrasLLMSettings
+        return CerebrasLLMService(
+            api_key=api_key,
+            settings=CerebrasLLMSettings(model=model, temperature=0.1),
        )
    else:
        raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
--- a/api/services/pricing/llm.py
+++ b/api/services/pricing/llm.py
@ -140,4 +140,30 @@ LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = {
            / 1000000,  # $1.60 per 1M tokens if using data zone
        )
    },
+    ServiceProviders.CEREBRAS: {
+        "llama3.1-8b": TokenPricingModel(
+            prompt_token_price=Decimal("0.10") / 1000000,
+            completion_token_price=Decimal("0.10") / 1000000,
+        ),
+        "llama3.1-70b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "llama-3.3-70b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "gpt-oss-120b": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "qwen-3-235b-a22b-instruct-2507": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+        "zai-glm-4.7": TokenPricingModel(
+            prompt_token_price=Decimal("0.60") / 1000000,
+            completion_token_price=Decimal("0.60") / 1000000,
+        ),
+    },
 }