mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-13 08:15:21 +02:00
support for cerebras
This commit is contained in:
parent
97777e9ccf
commit
3477714554
3 changed files with 56 additions and 0 deletions
|
|
@ -79,6 +79,7 @@ class ServiceProviders(str, Enum):
|
|||
GOOGLE_REALTIME = "google_realtime"
|
||||
GOOGLE_VERTEX_REALTIME = "google_vertex_realtime"
|
||||
AZURE_REALTIME = "azure_realtime"
|
||||
CEREBRAS = "cerebras"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -106,6 +107,8 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.GOOGLE_VERTEX_REALTIME,
|
||||
ServiceProviders.AZURE_REALTIME,
|
||||
ServiceProviders.SARVAM,
|
||||
ServiceProviders.CEREBRAS,
|
||||
# ServiceProviders.SARVAM,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
||||
|
|
@ -718,6 +721,27 @@ class AzureRealtimeLLMConfiguration(BaseLLMConfiguration):
|
|||
"examples": AZURE_REALTIME_API_VERSIONS,
|
||||
},
|
||||
)
|
||||
CEREBRAS_MODELS = [
|
||||
"llama3.1-8b",
|
||||
"llama3.1-70b",
|
||||
"llama-3.3-70b",
|
||||
"gpt-oss-120b",
|
||||
"qwen-3-235b-a22b-instruct-2507",
|
||||
"zai-glm-4.7",
|
||||
]
|
||||
|
||||
|
||||
@register_llm
|
||||
class CerebrasLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.CEREBRAS] = ServiceProviders.CEREBRAS
|
||||
model: str = Field(
|
||||
default="llama3.1-8b",
|
||||
json_schema_extra={
|
||||
"examples": CEREBRAS_MODELS,
|
||||
"allow_custom_input": True,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
||||
REALTIME_PROVIDERS = {
|
||||
|
|
@ -743,6 +767,7 @@ LLMConfig = Annotated[
|
|||
SpeachesLLMConfiguration,
|
||||
MiniMaxLLMConfiguration,
|
||||
SarvamLLMConfiguration,
|
||||
CerebrasLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -672,6 +672,11 @@ def create_llm_service_from_provider(
|
|||
model=model,
|
||||
temperature=temperature if temperature is not None else 0.5,
|
||||
),
|
||||
elif provider == ServiceProviders.CEREBRAS.value:
|
||||
from pipecat.services.cerebras.llm import CerebrasLLMService, CerebrasLLMSettings
|
||||
return CerebrasLLMService(
|
||||
api_key=api_key,
|
||||
settings=CerebrasLLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
|
||||
|
|
|
|||
|
|
@ -140,4 +140,30 @@ LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = {
|
|||
/ 1000000, # $1.60 per 1M tokens if using data zone
|
||||
)
|
||||
},
|
||||
ServiceProviders.CEREBRAS: {
|
||||
"llama3.1-8b": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.10") / 1000000,
|
||||
completion_token_price=Decimal("0.10") / 1000000,
|
||||
),
|
||||
"llama3.1-70b": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.60") / 1000000,
|
||||
completion_token_price=Decimal("0.60") / 1000000,
|
||||
),
|
||||
"llama-3.3-70b": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.60") / 1000000,
|
||||
completion_token_price=Decimal("0.60") / 1000000,
|
||||
),
|
||||
"gpt-oss-120b": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.60") / 1000000,
|
||||
completion_token_price=Decimal("0.60") / 1000000,
|
||||
),
|
||||
"qwen-3-235b-a22b-instruct-2507": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.60") / 1000000,
|
||||
completion_token_price=Decimal("0.60") / 1000000,
|
||||
),
|
||||
"zai-glm-4.7": TokenPricingModel(
|
||||
prompt_token_price=Decimal("0.60") / 1000000,
|
||||
completion_token_price=Decimal("0.60") / 1000000,
|
||||
),
|
||||
},
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue