support for cerebras

This commit is contained in:
Hridayesh Gupta 2026-05-19 12:26:59 +05:30
parent 97777e9ccf
commit 3477714554
3 changed files with 56 additions and 0 deletions

View file

@ -79,6 +79,7 @@ class ServiceProviders(str, Enum):
GOOGLE_REALTIME = "google_realtime"
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
AZURE_REALTIME = "azure_realtime"
CEREBRAS = "cerebras"
class BaseServiceConfiguration(BaseModel):
@ -106,6 +107,8 @@ class BaseServiceConfiguration(BaseModel):
ServiceProviders.GOOGLE_VERTEX_REALTIME,
ServiceProviders.AZURE_REALTIME,
ServiceProviders.SARVAM,
ServiceProviders.CEREBRAS,
# ServiceProviders.SARVAM,
]
api_key: str | list[str]
@ -718,6 +721,27 @@ class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
"examples": AZURE_REALTIME_API_VERSIONS,
},
)
CEREBRAS_MODELS = [
"llama3.1-8b",
"llama3.1-70b",
"llama-3.3-70b",
"gpt-oss-120b",
"qwen-3-235b-a22b-instruct-2507",
"zai-glm-4.7",
]
@register_llm
class CerebrasLLMConfiguration(BaseLLMConfiguration):
provider: Literal[ServiceProviders.CEREBRAS] = ServiceProviders.CEREBRAS
model: str = Field(
default="llama3.1-8b",
json_schema_extra={
"examples": CEREBRAS_MODELS,
"allow_custom_input": True,
},
)
REALTIME_PROVIDERS = {
@ -743,6 +767,7 @@ LLMConfig = Annotated[
SpeachesLLMConfiguration,
MiniMaxLLMConfiguration,
SarvamLLMConfiguration,
CerebrasLLMConfiguration,
],
Field(discriminator="provider"),
]

View file

@ -672,6 +672,11 @@ def create_llm_service_from_provider(
model=model,
temperature=temperature if temperature is not None else 0.5,
),
elif provider == ServiceProviders.CEREBRAS.value:
from pipecat.services.cerebras.llm import CerebrasLLMService, CerebrasLLMSettings
return CerebrasLLMService(
api_key=api_key,
settings=CerebrasLLMSettings(model=model, temperature=0.1),
)
else:
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")

View file

@ -140,4 +140,30 @@ LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = {
/ 1000000, # $1.60 per 1M tokens if using data zone
)
},
ServiceProviders.CEREBRAS: {
"llama3.1-8b": TokenPricingModel(
prompt_token_price=Decimal("0.10") / 1000000,
completion_token_price=Decimal("0.10") / 1000000,
),
"llama3.1-70b": TokenPricingModel(
prompt_token_price=Decimal("0.60") / 1000000,
completion_token_price=Decimal("0.60") / 1000000,
),
"llama-3.3-70b": TokenPricingModel(
prompt_token_price=Decimal("0.60") / 1000000,
completion_token_price=Decimal("0.60") / 1000000,
),
"gpt-oss-120b": TokenPricingModel(
prompt_token_price=Decimal("0.60") / 1000000,
completion_token_price=Decimal("0.60") / 1000000,
),
"qwen-3-235b-a22b-instruct-2507": TokenPricingModel(
prompt_token_price=Decimal("0.60") / 1000000,
completion_token_price=Decimal("0.60") / 1000000,
),
"zai-glm-4.7": TokenPricingModel(
prompt_token_price=Decimal("0.60") / 1000000,
completion_token_price=Decimal("0.60") / 1000000,
),
},
}