From fdc758a571132a38e12f20884fa5dd2b33f1c868 Mon Sep 17 00:00:00 2001 From: better629 Date: Tue, 16 Apr 2024 15:10:23 +0800 Subject: [PATCH] add openrouter support --- metagpt/configs/llm_config.py | 1 + metagpt/provider/base_llm.py | 2 +- metagpt/provider/openai_api.py | 16 +++++++++++++++- metagpt/utils/token_counter.py | 22 ++++++++++++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index af8f56372..222e116ee 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -31,6 +31,7 @@ class LLMType(Enum): MOONSHOT = "moonshot" MISTRAL = "mistral" YI = "yi" # lingyiwanwu + OPENROUTER = "openrouter" def __missing__(self, key): return self.OPENAI diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index db2757ec3..6387e3936 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -112,7 +112,7 @@ class BaseLLM(ABC): model = model or self.pricing_plan model = model or self.model usage = usage.model_dump() if isinstance(usage, BaseModel) else usage - if calc_usage and self.cost_manager: + if calc_usage and self.cost_manager and usage: try: prompt_tokens = int(usage.get("prompt_tokens", 0)) completion_tokens = int(usage.get("completion_tokens", 0)) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index dbfed72df..7957f775c 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -37,10 +37,21 @@ from metagpt.utils.token_counter import ( count_message_tokens, count_string_tokens, get_max_completion_tokens, + get_openrouter_tokens, ) -@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL, LLMType.YI]) +@register_provider( + [ + LLMType.OPENAI, + LLMType.FIREWORKS, + LLMType.OPEN_LLM, + LLMType.MOONSHOT, + LLMType.MISTRAL, + LLMType.YI, + LLMType.OPENROUTER, + ] +) class OpenAILLM(BaseLLM): """Check https://platform.openai.com/examples for examples""" @@ -95,6 +106,9 @@ class OpenAILLM(BaseLLM): elif hasattr(chunk.choices[0], "usage"): # The usage of some services is an attribute of chunk.choices[0], such as Moonshot usage = CompletionUsage(**chunk.choices[0].usage) + elif "openrouter.ai" in self.config.base_url: + # due to it get token cost from api + usage = await get_openrouter_tokens(chunk) log_llm_stream("\n") full_reply_content = "".join(collected_messages) diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 0ca22cf35..98ae079eb 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -11,6 +11,10 @@ ref4: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/o ref5: https://ai.google.dev/models/gemini """ import tiktoken +from openai.types import CompletionUsage +from openai.types.chat import ChatCompletionChunk + +from metagpt.utils.ahttp_client import apost TOKEN_COSTS = { "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002}, @@ -52,6 +56,9 @@ TOKEN_COSTS = { "claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075}, "yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003}, "yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017}, + "microsoft/wizardlm-2-8x22b": {"prompt": 0.00108, "completion": 0.00108}, # for openrouter, start + "openai/gpt-3.5-turbo-0125": {"prompt": 0.0005, "completion": 0.0015}, + "openai/gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03}, } @@ -182,6 +189,9 @@ TOKEN_MAX = { "claude-3-opus-20240229": 200000, "yi-34b-chat-0205": 4000, "yi-34b-chat-200k": 200000, + "microsoft/wizardlm-2-8x22b": 65536, + "openai/gpt-3.5-turbo-0125": 16385, + "openai/gpt-4-turbo-preview": 128000, } @@ -284,3 +294,15 @@ def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> if model not in TOKEN_MAX: return default return TOKEN_MAX[model] - count_message_tokens(messages) - 1 + + +async def get_openrouter_tokens(chunk: ChatCompletionChunk) -> CompletionUsage: + """refs to https://openrouter.ai/docs#querying-cost-and-stats""" + url = f"https://openrouter.ai/api/v1/generation?id={chunk.id}" + resp = await apost(url=url, as_json=True) + tokens_prompt = resp.get("tokens_prompt", 0) + completion_tokens = resp.get("tokens_completion", 0) + usage = CompletionUsage( + prompt_tokens=tokens_prompt, completion_tokens=completion_tokens, total_tokens=tokens_prompt + completion_tokens + ) + return usage