From 430fe2961758f1e68aa99978da9f4d8041fa52ea Mon Sep 17 00:00:00 2001 From: YangQianli92 <108046369+YangQianli92@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:15:32 +0800 Subject: [PATCH] Add files via upload --- metagpt/utils/cost_manager.py | 45 +++++++++++++++++++++++++++++++++- metagpt/utils/token_counter.py | 14 +++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py index efff07ae1..921fbe6cd 100644 --- a/metagpt/utils/cost_manager.py +++ b/metagpt/utils/cost_manager.py @@ -6,12 +6,14 @@ @Desc : mashenquan, 2023/8/28. Separate the `CostManager` class to support user-level cost accounting. """ +import re from typing import NamedTuple from pydantic import BaseModel from metagpt.logs import logger -from metagpt.utils.token_counter import TOKEN_COSTS +from metagpt.utils.token_counter import FIREWORKS_GRADE_TOKEN_COSTS, TOKEN_COSTS + class Costs(NamedTuple): @@ -103,3 +105,44 @@ class TokenCostManager(CostManager): self.total_prompt_tokens += prompt_tokens self.total_completion_tokens += completion_tokens logger.info(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}") + + +class FireworksCostManager(CostManager): + def model_grade_token_costs(self, model: str) -> dict[str, float]: + def _get_model_size(model: str) -> float: + size = re.findall(".*-([0-9.]+)b", model) + size = float(size[0]) if len(size) > 0 else -1 + return size + + if "mixtral-8x7b" in model: + token_costs = FIREWORKS_GRADE_TOKEN_COSTS["mixtral-8x7b"] + else: + model_size = _get_model_size(model) + if 0 < model_size <= 16: + token_costs = FIREWORKS_GRADE_TOKEN_COSTS["16"] + elif 16 < model_size <= 80: + token_costs = FIREWORKS_GRADE_TOKEN_COSTS["80"] + else: + token_costs = FIREWORKS_GRADE_TOKEN_COSTS["-1"] + return token_costs + + def update_cost(self, prompt_tokens: int, completion_tokens: int, model: str): + """ + Refs to `https://app.fireworks.ai/pricing` **Developer pricing** + Update the total cost, prompt tokens, and completion tokens. + + Args: + prompt_tokens (int): The number of tokens used in the prompt. + completion_tokens (int): The number of tokens used in the completion. + model (str): The model used for the API call. + """ + self.total_prompt_tokens += prompt_tokens + self.total_completion_tokens += completion_tokens + + token_costs = self.model_grade_token_costs(model) + cost = (prompt_tokens * token_costs["prompt"] + completion_tokens * token_costs["completion"]) / 1000000 + self.total_cost += cost + logger.info( + f"Total running cost: ${self.total_cost:.4f}" + f"Current cost: ${cost:.4f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}" + ) \ No newline at end of file diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 167a1d755..9a6fddf34 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -35,6 +35,9 @@ TOKEN_COSTS = { "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens "gemini-pro": {"prompt": 0.00025, "completion": 0.0005}, + "moonshot-v1-8k": {"prompt": 0.012, "completion": 0.012}, # prompt + completion tokens=0.012¥/k-tokens + "moonshot-v1-32k": {"prompt": 0.024, "completion": 0.024}, + "moonshot-v1-128k": {"prompt": 0.06, "completion": 0.06}, } @@ -120,6 +123,14 @@ DASHSCOPE_TOKEN_COSTS = { } +FIREWORKS_GRADE_TOKEN_COSTS = { + "-1": {"prompt": 0.0, "completion": 0.0}, # abnormal condition + "16": {"prompt": 0.2, "completion": 0.8}, # 16 means model size <= 16B; 0.2 means $0.2/1M tokens + "80": {"prompt": 0.7, "completion": 2.8}, # 80 means 16B < model size <= 80B + "mixtral-8x7b": {"prompt": 0.4, "completion": 1.6}, +} + + TOKEN_MAX = { "gpt-3.5-turbo": 4096, "gpt-3.5-turbo-0301": 4096, @@ -143,6 +154,9 @@ TOKEN_MAX = { "glm-3-turbo": 128000, "glm-4": 128000, "gemini-pro": 32768, + "moonshot-v1-8k": 8192, + "moonshot-v1-32k": 32768, + "moonshot-v1-128k": 128000, }