diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 31907d9e8..85d5ef50e 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -37,7 +37,6 @@ from metagpt.utils.token_counter import ( count_input_tokens, count_output_tokens, get_max_completion_tokens, - get_openrouter_tokens, ) @@ -92,6 +91,7 @@ class OpenAILLM(BaseLLM): ) usage = None collected_messages = [] + has_finished = False async for chunk in response: chunk_message = chunk.choices[0].delta.content or "" if chunk.choices else "" # extract the message finish_reason = ( @@ -99,6 +99,10 @@ class OpenAILLM(BaseLLM): ) log_llm_stream(chunk_message) collected_messages.append(chunk_message) + if has_finished: + # for oneapi, there has a usage chunk after finish_reason not none chunk + if hasattr(chunk, "usage"): + usage = CompletionUsage(**chunk.usage) if finish_reason: if hasattr(chunk, "usage") and chunk.usage is not None: # Some services have usage as an attribute of the chunk, such as Fireworks @@ -109,9 +113,10 @@ class OpenAILLM(BaseLLM): elif hasattr(chunk.choices[0], "usage"): # The usage of some services is an attribute of chunk.choices[0], such as Moonshot usage = CompletionUsage(**chunk.choices[0].usage) - elif "openrouter.ai" in self.config.base_url: + elif "openrouter.ai" in self.config.base_url and hasattr(chunk, "usage") and chunk.usage is not None: # due to it get token cost from api - usage = await get_openrouter_tokens(chunk) + usage = chunk.usage + has_finished = True log_llm_stream("\n") full_reply_content = "".join(collected_messages) diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 49d62b69a..90fd9e960 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -41,11 +41,19 @@ TOKEN_COSTS = { "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, "gpt-4o": {"prompt": 0.005, "completion": 0.015}, "gpt-4o-mini": {"prompt": 0.00015, "completion": 0.0006}, + "gpt-4o-mini-2024-07-18": {"prompt": 0.00015, "completion": 0.0006}, "gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015}, + "gpt-4o-2024-08-06": {"prompt": 0.0025, "completion": 0.01}, + "o1-preview": {"prompt": 0.015, "completion": 0.06}, + "o1-preview-2024-09-12": {"prompt": 0.015, "completion": 0.06}, + "o1-mini": {"prompt": 0.003, "completion": 0.012}, + "o1-mini-2024-09-12": {"prompt": 0.003, "completion": 0.012}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens - "gemini-pro": {"prompt": 0.00025, "completion": 0.0005}, + "gemini-1.5-flash": {"prompt": 0.000075, "completion": 0.0003}, + "gemini-1.5-pro": {"prompt": 0.0035, "completion": 0.0105}, + "gemini-1.0-pro": {"prompt": 0.0005, "completion": 0.0015}, "moonshot-v1-8k": {"prompt": 0.012, "completion": 0.012}, # prompt + completion tokens=0.012¥/k-tokens "moonshot-v1-32k": {"prompt": 0.024, "completion": 0.024}, "moonshot-v1-128k": {"prompt": 0.06, "completion": 0.06}, @@ -69,15 +77,20 @@ TOKEN_COSTS = { "llama3-70b-8192": {"prompt": 0.0059, "completion": 0.0079}, "openai/gpt-3.5-turbo-0125": {"prompt": 0.0005, "completion": 0.0015}, "openai/gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03}, + "openai/o1-preview": {"prompt": 0.015, "completion": 0.06}, + "openai/o1-mini": {"prompt": 0.003, "completion": 0.012}, + "anthropic/claude-3-opus": {"prompt": 0.015, "completion": 0.075}, + "anthropic/claude-3.5-sonnet": {"prompt": 0.003, "completion": 0.015}, + "google/gemini-pro-1.5": {"prompt": 0.0025, "completion": 0.0075}, # for openrouter, end "deepseek-chat": {"prompt": 0.00014, "completion": 0.00028}, "deepseek-coder": {"prompt": 0.00014, "completion": 0.00028}, # For ark model https://www.volcengine.com/docs/82379/1099320 - "doubao-lite-4k-240515": {"prompt": 0.000042, "completion": 0.000084}, - "doubao-lite-32k-240515": {"prompt": 0.000042, "completion": 0.000084}, - "doubao-lite-128k-240515": {"prompt": 0.00011, "completion": 0.00013}, - "doubao-pro-4k-240515": {"prompt": 0.00011, "completion": 0.00028}, - "doubao-pro-32k-240515": {"prompt": 0.00011, "completion": 0.00028}, - "doubao-pro-128k-240515": {"prompt": 0.0007, "completion": 0.0012}, + "doubao-lite-4k-240515": {"prompt": 0.000043, "completion": 0.000086}, + "doubao-lite-32k-240515": {"prompt": 0.000043, "completion": 0.000086}, + "doubao-lite-128k-240515": {"prompt": 0.00011, "completion": 0.00014}, + "doubao-pro-4k-240515": {"prompt": 0.00011, "completion": 0.00029}, + "doubao-pro-32k-240515": {"prompt": 0.00011, "completion": 0.00029}, + "doubao-pro-128k-240515": {"prompt": 0.0007, "completion": 0.0013}, "llama3-70b-llama3-70b-instruct": {"prompt": 0.0, "completion": 0.0}, "llama3-8b-llama3-8b-instruct": {"prompt": 0.0, "completion": 0.0}, } @@ -142,10 +155,10 @@ Some new model published by Alibaba will be prioritized to be released on the Mo Token price on Model Studio shows on https://help.aliyun.com/zh/model-studio/getting-started/models#ced16cb6cdfsy """ DASHSCOPE_TOKEN_COSTS = { - "qwen2.5-72b-instruct": {"prompt": 0.004, "completion": 0.012}, # per 1k tokens - "qwen2.5-32b-instruct": {"prompt": 0.0035, "completion": 0.007}, - "qwen2.5-14b-instruct": {"prompt": 0.002, "completion": 0.006}, - "qwen2.5-7b-instruct": {"prompt": 0.001, "completion": 0.002}, + "qwen2.5-72b-instruct": {"prompt": 0.00057, "completion": 0.0017}, # per 1k tokens + "qwen2.5-32b-instruct": {"prompt": 0.0005, "completion": 0.001}, + "qwen2.5-14b-instruct": {"prompt": 0.00029, "completion": 0.00086}, + "qwen2.5-7b-instruct": {"prompt": 0.00014, "completion": 0.00029}, "qwen2.5-3b-instruct": {"prompt": 0.0, "completion": 0.0}, "qwen2.5-1.5b-instruct": {"prompt": 0.0, "completion": 0.0}, "qwen2.5-0.5b-instruct": {"prompt": 0.0, "completion": 0.0}, @@ -199,16 +212,24 @@ FIREWORKS_GRADE_TOKEN_COSTS = { # https://console.volcengine.com/ark/region:ark+cn-beijing/model DOUBAO_TOKEN_COSTS = { - "doubao-lite": {"prompt": 0.0003, "completion": 0.0006}, - "doubao-lite-128k": {"prompt": 0.0008, "completion": 0.0010}, - "doubao-pro": {"prompt": 0.0008, "completion": 0.0020}, - "doubao-pro-128k": {"prompt": 0.0050, "completion": 0.0090}, + "doubao-lite": {"prompt": 0.000043, "completion": 0.000086}, + "doubao-lite-128k": {"prompt": 0.00011, "completion": 0.00014}, + "doubao-pro": {"prompt": 0.00011, "completion": 0.00029}, + "doubao-pro-128k": {"prompt": 0.00071, "completion": 0.0013}, + "doubao-pro-256k": {"prompt": 0.00071, "completion": 0.0013}, } # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo TOKEN_MAX = { - "gpt-4o-2024-05-13": 128000, + "o1-preview": 128000, + "o1-preview-2024-09-12": 128000, + "o1-mini": 128000, + "o1-mini-2024-09-12": 128000, "gpt-4o": 128000, + "gpt-4o-2024-05-13": 128000, + "gpt-4o-2024-08-06": 128000, + "gpt-4o-mini-2024-07-18": 128000, + "gpt-4o-mini": 128000, "gpt-4-turbo-2024-04-09": 128000, "gpt-4-0125-preview": 128000, "gpt-4-turbo-preview": 128000, @@ -220,7 +241,6 @@ TOKEN_MAX = { "gpt-4-0613": 8192, "gpt-4-32k": 32768, "gpt-4-32k-0613": 32768, - "gpt-4o-mini": 128000, "gpt-3.5-turbo-0125": 16385, "gpt-3.5-turbo": 16385, "gpt-3.5-turbo-1106": 16385, @@ -231,7 +251,9 @@ TOKEN_MAX = { "text-embedding-ada-002": 8192, "glm-3-turbo": 128000, "glm-4": 128000, - "gemini-pro": 32768, + "gemini-1.5-flash": 1000000, + "gemini-1.5-pro": 2000000, + "gemini-1.0-pro": 32000, "moonshot-v1-8k": 8192, "moonshot-v1-32k": 32768, "moonshot-v1-128k": 128000, @@ -255,6 +277,11 @@ TOKEN_MAX = { "llama3-70b-8192": 8192, "openai/gpt-3.5-turbo-0125": 16385, "openai/gpt-4-turbo-preview": 128000, + "openai/o1-preview": 128000, + "openai/o1-mini": 128000, + "anthropic/claude-3-opus": 200000, + "anthropic/claude-3.5-sonnet": 200000, + "google/gemini-pro-1.5": 4000000, "deepseek-chat": 32768, "deepseek-coder": 16385, "doubao-lite-4k-240515": 4000, @@ -375,7 +402,13 @@ def count_input_tokens(messages, model="gpt-3.5-turbo-0125"): "gpt-4-1106-vision-preview", "gpt-4o", "gpt-4o-2024-05-13", + "gpt-4o-2024-08-06", "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", }: tokens_per_message = 3 # # every reply is primed with <|start|>assistant<|message|> tokens_per_name = 1