diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index a41c8b0a6..47d7df6f1 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -105,6 +105,9 @@ class OpenAILLM(BaseLLM): elif hasattr(chunk.choices[0], "usage"): # The usage of some services is an attribute of chunk.choices[0], such as Moonshot usage = CompletionUsage(**chunk.choices[0].usage) + if "openrouter.ai" in self.config.base_url and hasattr(chunk, "usage") and chunk.usage is not None: + # due to it get token cost from api + usage = chunk.usage log_llm_stream("\n") full_reply_content = "".join(collected_messages) diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 1019a17af..5be6b5f61 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -13,6 +13,7 @@ ref5: https://ai.google.dev/models/gemini import tiktoken TOKEN_COSTS = { + "anthropic/claude-3.5-sonnet": {"prompt": 0.003, "completion": 0.015}, "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002}, "gpt-3.5-turbo-0301": {"prompt": 0.0015, "completion": 0.002}, "gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002}, @@ -31,9 +32,11 @@ TOKEN_COSTS = { "gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator + "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4o": {"prompt": 0.005, "completion": 0.015}, "gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015}, - "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, + "gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006}, + "gpt-4o-mini-2024-07-18": {"prompt": 0.00015, "completion": 0.0006}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens @@ -53,6 +56,18 @@ TOKEN_COSTS = { "claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075}, "yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003}, "yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017}, + "openai/gpt-4": {"prompt": 0.03, "completion": 0.06}, # start, for openrouter + "openai/gpt-4-turbo": {"prompt": 0.01, "completion": 0.03}, + "openai/gpt-4o": {"prompt": 0.005, "completion": 0.015}, + "openai/gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015}, + "openai/gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006}, + "openai/gpt-4o-mini-2024-07-18": {"prompt": 0.00015, "completion": 0.0006}, + "google/gemini-pro-1.5": {"prompt": 0.0025, "completion": 0.0075}, + "google/gemini-flash-1.5": {"prompt": 0.00025, "completion": 0.00075}, + "deepseek/deepseek-coder": {"prompt": 0.00014, "completion": 0.00028}, + "deepseek/deepseek-chat": {"prompt": 0.00014, "completion": 0.00028}, # end, for openrouter + "deepseek-chat": {"prompt": 0.00014, "completion": 0.00028}, + "deepseek-coder": {"prompt": 0.00014, "completion": 0.00028}, } @@ -149,6 +164,8 @@ FIREWORKS_GRADE_TOKEN_COSTS = { TOKEN_MAX = { "gpt-4o-2024-05-13": 128000, "gpt-4o": 128000, + "gpt-4o-mini": 128000, + "gpt-4o-mini-2024-07-18": 128000, "gpt-4-0125-preview": 128000, "gpt-4-turbo-preview": 128000, "gpt-4-1106-preview": 128000, @@ -185,6 +202,19 @@ TOKEN_MAX = { "claude-3-opus-20240229": 200000, "yi-34b-chat-0205": 4000, "yi-34b-chat-200k": 200000, + "openai/gpt-4": 8192, # start, for openrouter + "openai/gpt-4-turbo": 128000, + "openai/gpt-4o": 128000, + "openai/gpt-4o-2024-05-13": 128000, + "openai/gpt-4o-mini": 128000, + "openai/gpt-4o-mini-2024-07-18": 128000, + "anthropic/claude-3.5-sonnet": 200000, + "google/gemini-pro-1.5": 2800000, + "google/gemini-flash-1.5": 2800000, + "deepseek/deepseek-coder": 128000, + "deepseek/deepseek-chat": 128000, # end, for openrouter + "deepseek-chat": 128000, + "deepseek-coder": 128000, } @@ -214,6 +244,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0125"): "gpt-4-1106-vision-preview", "gpt-4o-2024-05-13", "gpt-4o", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", }: tokens_per_message = 3 # # every reply is primed with <|start|>assistant<|message|> tokens_per_name = 1