reasoning_max_token set to 4000(suggested)

This commit is contained in:
better629 2025-02-28 10:46:17 +08:00
parent ff477066c5
commit 119385c5ce
5 changed files with 5 additions and 4 deletions

View file

@ -103,7 +103,7 @@ class LLMConfig(YamlModel):
# reasoning / thinking switch
reasoning: bool = False
reasoning_max_token: int = 1024 # reasoning budget tokens to generate, usually smaller than max_token
reasoning_max_token: int = 4000 # reasoning budget tokens to generate, usually smaller than max_token
@field_validator("api_key")
@classmethod

View file

@ -42,7 +42,7 @@ class AnthropicLLM(BaseLLM):
super()._update_costs(usage, model)
def get_choice_text(self, resp: Message) -> str:
if len(resp.content) > 0:
if len(resp.content) > 1:
self.reasoning_content = resp.content[0].thinking
text = resp.content[1].text
else:

View file

@ -7,7 +7,7 @@ class BaseBedrockProvider(ABC):
# to handle different generation kwargs
max_tokens_field_name = "max_tokens"
def __init__(self, reasoning: bool = False, reasoning_max_token: int = 1024):
def __init__(self, reasoning: bool = False, reasoning_max_token: int = 4000):
self.reasoning = reasoning
self.reasoning_max_token = reasoning_max_token

View file

@ -191,7 +191,7 @@ PROVIDERS = {
}
def get_provider(model_id: str, reasoning: bool = False, reasoning_max_token: int = 1024):
def get_provider(model_id: str, reasoning: bool = False, reasoning_max_token: int = 4000):
arr = model_id.split(".")
if len(arr) == 2:
provider, model_name = arr # meta、mistral……

View file

@ -90,6 +90,7 @@ TOKEN_COSTS = {
"anthropic/claude-3.7-sonnet": {"prompt": 0.003, "completion": 0.015},
"anthropic/claude-3.7-sonnet:beta": {"prompt": 0.003, "completion": 0.015},
"anthropic/claude-3.7-sonnet:thinking": {"prompt": 0.003, "completion": 0.015},
"anthropic.claude-3-7-sonnet-20250219-v1:0": {"prompt": 0.003, "completion": 0.015},
"us.anthropic.claude-3-7-sonnet-20250219-v1:0": {"prompt": 0.003, "completion": 0.015},
"google/gemini-pro-1.5": {"prompt": 0.0025, "completion": 0.0075}, # for openrouter, end
"deepseek-chat": {"prompt": 0.00027, "completion": 0.0011},