From c7ee54ace108f24571a5434ded2452a102be7c86 Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 7 Feb 2024 22:50:30 +0800 Subject: [PATCH 01/12] add dashscope including QWEN and its ut code --- examples/llm_hello_world.py | 2 +- metagpt/configs/llm_config.py | 1 + metagpt/provider/__init__.py | 2 + metagpt/provider/dashscope_api.py | 246 +++++++++++++++++++ metagpt/utils/token_counter.py | 29 +++ requirements.txt | 1 + tests/metagpt/provider/mock_llm_config.py | 2 + tests/metagpt/provider/req_resp_const.py | 28 +++ tests/metagpt/provider/test_dashscope_api.py | 61 +++++ 9 files changed, 371 insertions(+), 1 deletion(-) create mode 100644 metagpt/provider/dashscope_api.py create mode 100644 tests/metagpt/provider/test_dashscope_api.py diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py index e22edbdf2..9340f7d5a 100644 --- a/examples/llm_hello_world.py +++ b/examples/llm_hello_world.py @@ -21,7 +21,7 @@ async def main(): logger.info( await llm.aask( - "who are you", system_msgs=["act as a robot, answer 'I'am robot' if the question is 'who are you'"] + "who are you", system_msgs=["act as a robot, just answer 'I'am robot' if the question is 'who are you'"] ) ) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index 1b05b5270..36f5d7ae7 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -25,6 +25,7 @@ class LLMType(Enum): AZURE = "azure" OLLAMA = "ollama" QIANFAN = "qianfan" # Baidu BCE + DASHSCOPE = "dashscope" # Aliyun LingJi DashScope def __missing__(self, key): return self.OPENAI diff --git a/metagpt/provider/__init__.py b/metagpt/provider/__init__.py index 8c0aab836..44e6d3f3b 100644 --- a/metagpt/provider/__init__.py +++ b/metagpt/provider/__init__.py @@ -17,6 +17,7 @@ from metagpt.provider.metagpt_api import MetaGPTLLM from metagpt.provider.human_provider import HumanProvider from metagpt.provider.spark_api import SparkLLM from metagpt.provider.qianfan_api import QianFanLLM +from metagpt.provider.dashscope_api import DashScopeLLM __all__ = [ "FireworksLLM", @@ -30,4 +31,5 @@ __all__ = [ "HumanProvider", "SparkLLM", "QianFanLLM", + "DashScopeLLM", ] diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py new file mode 100644 index 000000000..58031e452 --- /dev/null +++ b/metagpt/provider/dashscope_api.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : + +import json +from http import HTTPStatus +from typing import Any, AsyncGenerator, Dict, List, Union + +import dashscope +from dashscope.aigc.generation import Generation +from dashscope.api_entities.aiohttp_request import AioHttpRequest +from dashscope.api_entities.api_request_data import ApiRequestData +from dashscope.api_entities.api_request_factory import _get_protocol_params +from dashscope.api_entities.dashscope_response import ( + GenerationOutput, + GenerationResponse, + Message, +) +from dashscope.client.base_api import BaseAioApi +from dashscope.common.constants import SERVICE_API_PATH, ApiProtocol +from dashscope.common.error import ( + InputDataRequired, + InputRequired, + ModelRequired, + UnsupportedApiProtocol, +) +from tenacity import ( + after_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_random_exponential, +) + +from metagpt.logs import log_llm_stream, logger +from metagpt.provider.base_llm import BaseLLM, LLMConfig +from metagpt.provider.llm_provider_registry import LLMType, register_provider +from metagpt.provider.openai_api import log_and_reraise +from metagpt.utils.cost_manager import CostManager +from metagpt.utils.token_counter import DashScore_TOKEN_COSTS + + +def build_api_arequest( + model: str, input: object, task_group: str, task: str, function: str, api_key: str, is_service=True, **kwargs +): + ( + api_protocol, + ws_stream_mode, + is_binary_input, + http_method, + stream, + async_request, + query, + headers, + request_timeout, + form, + resources, + ) = _get_protocol_params(kwargs) + task_id = kwargs.pop("task_id", None) + if api_protocol in [ApiProtocol.HTTP, ApiProtocol.HTTPS]: + if not dashscope.base_http_api_url.endswith("/"): + http_url = dashscope.base_http_api_url + "/" + else: + http_url = dashscope.base_http_api_url + + if is_service: + http_url = http_url + SERVICE_API_PATH + "/" + + if task_group: + http_url += "%s/" % task_group + if task: + http_url += "%s/" % task + if function: + http_url += function + request = AioHttpRequest( + url=http_url, + api_key=api_key, + http_method=http_method, + stream=stream, + async_request=async_request, + query=query, + timeout=request_timeout, + task_id=task_id, + ) + else: + raise UnsupportedApiProtocol("Unsupported protocol: %s, support [http, https, websocket]" % api_protocol) + + if headers is not None: + request.add_headers(headers=headers) + + if input is None and form is None: + raise InputDataRequired("There is no input data and form data") + + request_data = ApiRequestData( + model, + task_group=task_group, + task=task, + function=function, + input=input, + form=form, + is_binary_input=is_binary_input, + api_protocol=api_protocol, + ) + request_data.add_resources(resources) + request_data.add_parameters(**kwargs) + request.data = request_data + return request + + +class AGeneration(Generation, BaseAioApi): + @classmethod + async def acall( + cls, + model: str, + prompt: Any = None, + history: list = None, + api_key: str = None, + messages: List[Message] = None, + plugins: Union[str, Dict[str, Any]] = None, + **kwargs, + ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]: + if (prompt is None or not prompt) and (messages is None or not messages): + raise InputRequired("prompt or messages is required!") + if model is None or not model: + raise ModelRequired("Model is required!") + task_group, function = "aigc", "generation" # fixed value + if plugins is not None: + headers = kwargs.pop("headers", {}) + if isinstance(plugins, str): + headers["X-DashScope-Plugin"] = plugins + else: + headers["X-DashScope-Plugin"] = json.dumps(plugins) + kwargs["headers"] = headers + input, parameters = cls._build_input_parameters(model, prompt, history, messages, **kwargs) + + api_key, model = BaseAioApi._validate_params(api_key, model) + request = build_api_arequest( + model=model, + input=input, + task_group=task_group, + task=Generation.task, + function=function, + api_key=api_key, + **kwargs, + ) + response = await request.aio_call() + is_stream = kwargs.get("stream", False) + if is_stream: + + async def aresp_iterator(response): + async for resp in response: + yield GenerationResponse.from_api_response(resp) + + return aresp_iterator(response) + else: + return GenerationResponse.from_api_response(response) + + +@register_provider(LLMType.DASHSCOPE) +class DashScopeLLM(BaseLLM): + def __init__(self, llm_config: LLMConfig): + self.config = llm_config + self.use_system_prompt = False # only some models support system_prompt + self.__init_dashscope() + self.cost_manager = CostManager(token_costs=self.token_costs) + + def __init_dashscope(self): + self.model = self.config.model + self.api_key = self.config.api_key + self.token_costs = DashScore_TOKEN_COSTS + self.aclient: AGeneration = AGeneration + + # check support system_message models + support_system_models = [ + "qwen-", # all support + "llama2-", # all support + "baichuan2-7b-chat-v1", + "chatglm3-6b", + ] + for support_model in support_system_models: + if support_model in self.model: + self.use_system_prompt = True + + def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict: + kwargs = { + "api_key": self.api_key, + "model": self.model, + "messages": messages, + "stream": stream, + "result_format": "message", + } + if self.config.temperature > 0: + # different model has default temperature. only set when it"s specified. + kwargs["temperature"] = self.config.temperature + return kwargs + + def _check_response(self, resp: GenerationResponse): + if resp.status_code != HTTPStatus.OK: + raise RuntimeError(f"code: {resp.code}, request_id: {resp.request_id}, message: {resp.message}") + + def get_choice_text(self, output: GenerationOutput) -> str: + return output.get("choices", [{}])[0].get("message", {}).get("content", "") + + def completion(self, messages: list[dict]) -> GenerationOutput: + resp: GenerationResponse = self.aclient.call(**self._const_kwargs(messages, stream=False)) + self._check_response(resp) + + self._update_costs(dict(resp.usage)) + return resp.output + + async def _achat_completion(self, messages: list[dict]) -> GenerationOutput: + resp: GenerationResponse = await self.aclient.acall(**self._const_kwargs(messages, stream=False)) + self._check_response(resp) + self._update_costs(dict(resp.usage)) + return resp.output + + async def acompletion(self, messages: list[dict], timeout=3) -> GenerationOutput: + return await self._achat_completion(messages) + + async def _achat_completion_stream(self, messages: list[dict]) -> str: + resp = await self.aclient.acall(**self._const_kwargs(messages, stream=True)) + collected_content = [] + usage = {} + async for chunk in resp: + self._check_response(chunk) + content = chunk.output.choices[0]["message"]["content"] + usage = dict(chunk.usage) # each chunk has usage + log_llm_stream(content) + collected_content.append(content) + log_llm_stream("\n") + self._update_costs(usage) + full_content = "".join(collected_content) + return full_content + + @retry( + stop=stop_after_attempt(3), + wait=wait_random_exponential(min=1, max=60), + after=after_log(logger, logger.level("WARNING").name), + retry=retry_if_exception_type(ConnectionError), + retry_error_callback=log_and_reraise, + ) + async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str: + if stream: + return await self._achat_completion_stream(messages) + resp = await self._achat_completion(messages) + return self.get_choice_text(resp) diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index b69ec73d3..cac706f6c 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -90,6 +90,35 @@ QianFan_EndPoint_TOKEN_COSTS = { "yi_34b_chat": QianFan_MODEL_TOKEN_COSTS["Yi-34B-Chat"], } +""" +DashScore Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing +Different model has different detail page. Attention, some model are free for a limited time. +""" +DashScore_TOKEN_COSTS = { + "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011}, + "qwen-plus": {"prompt": 0.0028, "completion": 0.0028}, + "qwen-max": {"prompt": 0.0, "completion": 0.0}, + "qwen-max-1201": {"prompt": 0.0, "completion": 0.0}, + "qwen-max-longcontext": {"prompt": 0.0, "completion": 0.0}, + "llama2-7b-chat-v2": {"prompt": 0.0, "completion": 0.0}, + "llama2-13b-chat-v2": {"prompt": 0.0, "completion": 0.0}, + "qwen-72b-chat": {"prompt": 0.0, "completion": 0.0}, + "qwen-14b-chat": {"prompt": 0.0011, "completion": 0.0011}, + "qwen-7b-chat": {"prompt": 0.00084, "completion": 0.00084}, + "qwen-1.8b-chat": {"prompt": 0.0, "completion": 0.0}, + "baichuan2-13b-chat-v1": {"prompt": 0.0011, "completion": 0.0011}, + "baichuan2-7b-chat-v1": {"prompt": 0.00084, "completion": 0.00084}, + "baichuan-7b-v1": {"prompt": 0.0, "completion": 0.0}, + "chatglm-6b-v2": {"prompt": 0.0011, "completion": 0.0011}, + "chatglm3-6b": {"prompt": 0.0, "completion": 0.0}, + "ziya-llama-13b-v1": {"prompt": 0.0, "completion": 0.0}, # no price page, judge it as free + "dolly-12b-v2": {"prompt": 0.0, "completion": 0.0}, + "belle-llama-13b-2m-v1": {"prompt": 0.0, "completion": 0.0}, + "moss-moon-003-sft-v1": {"prompt": 0.0, "completion": 0.0}, + "chatyuan-large-v2": {"prompt": 0.0, "completion": 0.0}, + "billa-7b-sft-v1": {"prompt": 0.0, "completion": 0.0}, +} + TOKEN_MAX = { "gpt-3.5-turbo": 4096, diff --git a/requirements.txt b/requirements.txt index c893bd713..ff5f97252 100644 --- a/requirements.txt +++ b/requirements.txt @@ -68,3 +68,4 @@ anytree ipywidgets==8.1.1 Pillow qianfan==0.3.1 +dashscope==1.14.1 diff --git a/tests/metagpt/provider/mock_llm_config.py b/tests/metagpt/provider/mock_llm_config.py index e0afaa51e..bb5508d80 100644 --- a/tests/metagpt/provider/mock_llm_config.py +++ b/tests/metagpt/provider/mock_llm_config.py @@ -54,3 +54,5 @@ mock_llm_config_spark = LLMConfig( ) mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo") + +mock_llm_config_dashscope = LLMConfig(api_type="dashscore", api_key="xxx", model="qwen-max") diff --git a/tests/metagpt/provider/req_resp_const.py b/tests/metagpt/provider/req_resp_const.py index 73939e1c6..802962013 100644 --- a/tests/metagpt/provider/req_resp_const.py +++ b/tests/metagpt/provider/req_resp_const.py @@ -3,6 +3,12 @@ # @Desc : default request & response data for provider unittest +from dashscope.api_entities.dashscope_response import ( + DashScopeAPIResponse, + GenerationOutput, + GenerationResponse, + GenerationUsage, +) from openai.types.chat.chat_completion import ( ChatCompletion, ChatCompletionMessage, @@ -102,6 +108,28 @@ def get_qianfan_response(name: str) -> QfResponse: return QfResponse(code=200, body=qf_jsonbody_dict) +# For DashScope +def get_dashscope_response(name: str) -> GenerationResponse: + return GenerationResponse.from_api_response( + DashScopeAPIResponse( + status_code=200, + output=GenerationOutput( + **{ + "text": "", + "finish_reason": "", + "choices": [ + { + "finish_reason": "stop", + "message": {"role": "assistant", "content": resp_cont_tmpl.format(name=name)}, + } + ], + } + ), + usage=GenerationUsage(**{"input_tokens": 12, "output_tokens": 98, "total_tokens": 110}), + ) + ) + + # For llm general chat functions call async def llm_general_chat_funcs_test(llm: BaseLLM, prompt: str, messages: list[dict], resp_cont: str): resp = await llm.aask(prompt, stream=False) diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py new file mode 100644 index 000000000..48a676bc7 --- /dev/null +++ b/tests/metagpt/provider/test_dashscope_api.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : the unittest of DashScopeLLM + +from typing import AsyncGenerator, Union + +import pytest +from dashscope.api_entities.dashscope_response import GenerationResponse + +from metagpt.provider.dashscope_api import DashScopeLLM +from tests.metagpt.provider.mock_llm_config import mock_llm_config_dashscope +from tests.metagpt.provider.req_resp_const import ( + get_dashscope_response, + llm_general_chat_funcs_test, + messages, + prompt, + resp_cont_tmpl, +) + +name = "qwen-max" +resp_cont = resp_cont_tmpl.format(name=name) + + +@classmethod +def mock_dashscope_call( + cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False +) -> GenerationResponse: + return get_dashscope_response(name) + + +@classmethod +async def mock_dashscope_acall( + cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False +) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]: + resps = [get_dashscope_response(name)] + + if stream: + + async def aresp_iterator(resps: list[GenerationResponse]): + for resp in resps: + yield resp + + return aresp_iterator(resps) + else: + return resps[0] + + +@pytest.mark.asyncio +async def test_dashscope_acompletion(mocker): + mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call) + mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall) + + dashscore_llm = DashScopeLLM(mock_llm_config_dashscope) + + resp = dashscore_llm.completion(messages) + assert resp.choices[0]["message"]["content"] == resp_cont + + resp = await dashscore_llm.acompletion(messages) + assert resp.choices[0]["message"]["content"] == resp_cont + + await llm_general_chat_funcs_test(dashscore_llm, prompt, messages, resp_cont) From e22f5c72ec7936b7a8f156208dc5be628a21de13 Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 7 Feb 2024 23:01:40 +0800 Subject: [PATCH 02/12] update spell-mistake --- tests/metagpt/provider/mock_llm_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metagpt/provider/mock_llm_config.py b/tests/metagpt/provider/mock_llm_config.py index bb5508d80..e75acf68f 100644 --- a/tests/metagpt/provider/mock_llm_config.py +++ b/tests/metagpt/provider/mock_llm_config.py @@ -55,4 +55,4 @@ mock_llm_config_spark = LLMConfig( mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo") -mock_llm_config_dashscope = LLMConfig(api_type="dashscore", api_key="xxx", model="qwen-max") +mock_llm_config_dashscope = LLMConfig(api_type="dashscope", api_key="xxx", model="qwen-max") From 4dde9b71304bd3260ae0f675567fd0a0bfc2b085 Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 7 Feb 2024 23:15:56 +0800 Subject: [PATCH 03/12] stream using log_llm_stream --- metagpt/provider/fireworks_api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py index e62a7066e..d0394296d 100644 --- a/metagpt/provider/fireworks_api.py +++ b/metagpt/provider/fireworks_api.py @@ -16,7 +16,7 @@ from tenacity import ( ) from metagpt.configs.llm_config import LLMConfig, LLMType -from metagpt.logs import logger +from metagpt.logs import log_llm_stream, logger from metagpt.provider.llm_provider_registry import register_provider from metagpt.provider.openai_api import OpenAILLM, log_and_reraise from metagpt.utils.cost_manager import CostManager @@ -96,10 +96,11 @@ class FireworksLLM(OpenAILLM): finish_reason = choice.finish_reason if hasattr(choice, "finish_reason") else None if choice_delta.content: collected_content.append(choice_delta.content) - print(choice_delta.content, end="") + log_llm_stream(choice_delta.content) if finish_reason: # fireworks api return usage when finish_reason is not None usage = CompletionUsage(**chunk.usage) + log_llm_stream("\n") full_content = "".join(collected_content) self._update_costs(usage.model_dump()) From cec879f71b84667297356b99e49ad68cf1f503db Mon Sep 17 00:00:00 2001 From: better629 Date: Sun, 18 Feb 2024 20:01:27 +0800 Subject: [PATCH 04/12] update repair llm with backslash problem --- metagpt/utils/repair_llm_raw_output.py | 12 ++++++++++++ tests/metagpt/utils/test_repair_llm_raw_output.py | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 06484f71d..b8756e8c6 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -119,6 +119,7 @@ def repair_json_format(output: str) -> str: logger.info(f"repair_json_format: {'}]'}") elif output.startswith("{") and output.endswith("]"): output = output[:-1] + "}" + # remove comments in output json string, after json value content, maybe start with #, maybe start with // arr = output.split("\n") new_arr = [] @@ -208,6 +209,17 @@ def repair_invalid_json(output: str, error: str) -> str: elif (rline[col_no] in ["'", '"']) and (line.startswith('"') or line.startswith("'")) and "," not in line: # problem, `"""` or `'''` without `,` new_line = f",{line}" + elif col_no - 1 >= 0 and rline[col_no - 1] in ['"', "'"]: + # backslash problem like \" in the output + char = rline[col_no - 1] + nearest_char_idx = rline[col_no:].find(char) + new_line = ( + rline[: col_no - 1] + + "\\" + + rline[col_no - 1 : col_no + nearest_char_idx] + + "\\" + + rline[col_no + nearest_char_idx :] + ) elif '",' not in line and "," not in line and '"' not in line: new_line = f'{line}",' elif not line.endswith(","): diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index e28423b91..7a29ea3ee 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -211,6 +211,11 @@ value output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1") assert output == target_output + raw_output = '{"key": "url "http" \\"https\\" "}' + target_output = '{"key": "url \\"http\\" \\"https\\" "}' + output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 1 column 15 (char 14)") + assert output == target_output + def test_retry_parse_json_text(): from metagpt.utils.repair_llm_raw_output import retry_parse_json_text From 41d6d2be1d26518593c30ce8af9b9ddc865eeded Mon Sep 17 00:00:00 2001 From: better629 Date: Sun, 18 Feb 2024 20:02:42 +0800 Subject: [PATCH 05/12] update dashscope stream output --- metagpt/provider/dashscope_api.py | 2 ++ requirements.txt | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py index 58031e452..c02a302e4 100644 --- a/metagpt/provider/dashscope_api.py +++ b/metagpt/provider/dashscope_api.py @@ -192,6 +192,8 @@ class DashScopeLLM(BaseLLM): if self.config.temperature > 0: # different model has default temperature. only set when it"s specified. kwargs["temperature"] = self.config.temperature + if stream: + kwargs["incremental_output"] = True return kwargs def _check_response(self, resp: GenerationResponse): diff --git a/requirements.txt b/requirements.txt index cfac35717..9759248bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ python_docx==0.8.11 PyYAML==6.0.1 # sentence_transformers==2.2.2 setuptools==65.6.3 -tenacity==8.2.2 +tenacity==8.2.3 tiktoken==0.5.2 tqdm==4.65.0 #unstructured[local-inference] @@ -68,4 +68,4 @@ anytree ipywidgets==8.1.1 Pillow qianfan==0.3.1 -dashscope==1.14.1 +dashscope==1.14.1 \ No newline at end of file From 655ba5647b82106bd8fc63035bb3b8c5da4a479c Mon Sep 17 00:00:00 2001 From: betterwang Date: Fri, 23 Feb 2024 12:45:06 +0800 Subject: [PATCH 06/12] update qianfan pypi version --- metagpt/utils/cost_manager.py | 2 +- requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py index 4e6b65b2c..929d0b2e5 100644 --- a/metagpt/utils/cost_manager.py +++ b/metagpt/utils/cost_manager.py @@ -42,7 +42,7 @@ class CostManager(BaseModel): """ self.total_prompt_tokens += prompt_tokens self.total_completion_tokens += completion_tokens - if model not in TOKEN_COSTS: + if model not in self.token_costs: logger.warning(f"Model {model} not found in TOKEN_COSTS.") return diff --git a/requirements.txt b/requirements.txt index 9759248bb..4651cb258 100644 --- a/requirements.txt +++ b/requirements.txt @@ -67,5 +67,5 @@ playwright>=1.26 # used at metagpt/tools/libs/web_scraping.py anytree ipywidgets==8.1.1 Pillow -qianfan==0.3.1 -dashscope==1.14.1 \ No newline at end of file +qianfan==0.3.2 +dashscope==1.14.1 From 6893f78c212a65eeafb746e9fc55372a709aee61 Mon Sep 17 00:00:00 2001 From: betterwang Date: Fri, 23 Feb 2024 12:54:46 +0800 Subject: [PATCH 07/12] update ut of dashscope --- tests/metagpt/provider/test_dashscope_api.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py index 48a676bc7..df6e92af0 100644 --- a/tests/metagpt/provider/test_dashscope_api.py +++ b/tests/metagpt/provider/test_dashscope_api.py @@ -23,14 +23,26 @@ resp_cont = resp_cont_tmpl.format(name=name) @classmethod def mock_dashscope_call( - cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False + cls, + messages: list[dict], + model: str, + api_key: str, + result_format: str, + incremental_output: bool = True, + stream: bool = False, ) -> GenerationResponse: return get_dashscope_response(name) @classmethod async def mock_dashscope_acall( - cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False + cls, + messages: list[dict], + model: str, + api_key: str, + result_format: str, + incremental_output: bool = True, + stream: bool = False, ) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]: resps = [get_dashscope_response(name)] From 79c71dc5ad980c32ee8103f8b8a08afb264ba217 Mon Sep 17 00:00:00 2001 From: betterwang Date: Fri, 23 Feb 2024 20:28:35 +0800 Subject: [PATCH 08/12] update ERNIE-Bot token cost --- metagpt/utils/token_counter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 5a1b2f2e7..53dc9e9c7 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -45,7 +45,7 @@ Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a s QianFan_MODEL_TOKEN_COSTS = { "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017}, "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067}, - "ERNIE-Bot": {"prompt": 0.017, "completion": 0.017}, + "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017}, "ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011}, "EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011}, "ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011}, From 2d17da28262210a6f03977e7277811fdf3d0bb10 Mon Sep 17 00:00:00 2001 From: betterwang Date: Sat, 24 Feb 2024 15:00:45 +0800 Subject: [PATCH 09/12] fix dashcope spell --- metagpt/provider/dashscope_api.py | 4 ++-- metagpt/utils/token_counter.py | 4 ++-- tests/metagpt/provider/test_dashscope_api.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py index c02a302e4..7d7dcb7b7 100644 --- a/metagpt/provider/dashscope_api.py +++ b/metagpt/provider/dashscope_api.py @@ -37,7 +37,7 @@ from metagpt.provider.base_llm import BaseLLM, LLMConfig from metagpt.provider.llm_provider_registry import LLMType, register_provider from metagpt.provider.openai_api import log_and_reraise from metagpt.utils.cost_manager import CostManager -from metagpt.utils.token_counter import DashScore_TOKEN_COSTS +from metagpt.utils.token_counter import DashScope_TOKEN_COSTS def build_api_arequest( @@ -167,7 +167,7 @@ class DashScopeLLM(BaseLLM): def __init_dashscope(self): self.model = self.config.model self.api_key = self.config.api_key - self.token_costs = DashScore_TOKEN_COSTS + self.token_costs = DashScope_TOKEN_COSTS self.aclient: AGeneration = AGeneration # check support system_message models diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 53dc9e9c7..220014d8f 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -91,10 +91,10 @@ QianFan_EndPoint_TOKEN_COSTS = { } """ -DashScore Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing +DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing Different model has different detail page. Attention, some model are free for a limited time. """ -DashScore_TOKEN_COSTS = { +DashScope_TOKEN_COSTS = { "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011}, "qwen-plus": {"prompt": 0.0028, "completion": 0.0028}, "qwen-max": {"prompt": 0.0, "completion": 0.0}, diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py index df6e92af0..a6dd8f247 100644 --- a/tests/metagpt/provider/test_dashscope_api.py +++ b/tests/metagpt/provider/test_dashscope_api.py @@ -62,12 +62,12 @@ async def test_dashscope_acompletion(mocker): mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call) mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall) - dashscore_llm = DashScopeLLM(mock_llm_config_dashscope) + dashscope_llm = DashScopeLLM(mock_llm_config_dashscope) - resp = dashscore_llm.completion(messages) + resp = dashscope_llm.completion(messages) assert resp.choices[0]["message"]["content"] == resp_cont - resp = await dashscore_llm.acompletion(messages) + resp = await dashscope_llm.acompletion(messages) assert resp.choices[0]["message"]["content"] == resp_cont - await llm_general_chat_funcs_test(dashscore_llm, prompt, messages, resp_cont) + await llm_general_chat_funcs_test(dashscope_llm, prompt, messages, resp_cont) From 51bd8a056e4a3f4ffc4bdda22018429841a793b4 Mon Sep 17 00:00:00 2001 From: betterwang Date: Wed, 28 Feb 2024 09:36:44 +0800 Subject: [PATCH 10/12] update missing content --- metagpt/configs/llm_config.py | 7 ++++++- metagpt/provider/base_llm.py | 25 ++++++++++++++++++++++++- metagpt/utils/cost_manager.py | 4 +++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index 4ed741af3..36f5d7ae7 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -38,13 +38,18 @@ class LLMConfig(YamlModel): Optional Fields in pydantic: https://docs.pydantic.dev/latest/migration/#required-optional-and-nullable-fields """ - api_key: str + api_key: str = "sk-" api_type: LLMType = LLMType.OPENAI base_url: str = "https://api.openai.com/v1" api_version: Optional[str] = None model: Optional[str] = None # also stands for DEPLOYMENT_NAME + # For Cloud Service Provider like Baidu/ Alibaba + access_key: Optional[str] = None + secret_key: Optional[str] = None + endpoint: Optional[str] = None # for self-deployed model on the cloud + # For Spark(Xunfei), maybe remove later app_id: Optional[str] = None api_secret: Optional[str] = None diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index b144471b5..2f57b15aa 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -11,11 +11,12 @@ from abc import ABC, abstractmethod from typing import Optional, Union from openai import AsyncOpenAI +from pydantic import BaseModel from metagpt.configs.llm_config import LLMConfig from metagpt.logs import logger from metagpt.schema import Message -from metagpt.utils.cost_manager import CostManager +from metagpt.utils.cost_manager import CostManager, Costs class BaseLLM(ABC): @@ -67,6 +68,28 @@ class BaseLLM(ABC): def _default_system_msg(self): return self._system_msg(self.system_prompt) + def _update_costs(self, usage: Union[dict, BaseModel], model: str = None, local_calc_usage: bool = True): + """update each request's token cost + Args: + model (str): model name or in some scenarios called endpoint + local_calc_usage (bool): some models don't calculate usage, it will overwrite LLMConfig.calc_usage + """ + calc_usage = self.config.calc_usage and local_calc_usage + model = model if model else self.model + usage = usage.model_dump() if isinstance(usage, BaseModel) else usage + if calc_usage and self.cost_manager: + try: + prompt_tokens = int(usage.get("prompt_tokens", 0)) + completion_tokens = int(usage.get("completion_tokens", 0)) + self.cost_manager.update_cost(prompt_tokens, completion_tokens, model) + except Exception as e: + logger.error(f"{self.__class__.__name__} updats costs failed! exp: {e}") + + def get_costs(self) -> Costs: + if not self.cost_manager: + return Costs(0, 0, 0, 0) + return self.cost_manager.get_costs() + async def aask( self, msg: str, diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py index c3aa7323f..efff07ae1 100644 --- a/metagpt/utils/cost_manager.py +++ b/metagpt/utils/cost_manager.py @@ -29,6 +29,7 @@ class CostManager(BaseModel): total_budget: float = 0 max_budget: float = 10.0 total_cost: float = 0 + token_costs: dict[str, dict[str, float]] = TOKEN_COSTS # different model's token cost def update_cost(self, prompt_tokens, completion_tokens, model): """ @@ -46,7 +47,8 @@ class CostManager(BaseModel): return cost = ( - prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"] + prompt_tokens * self.token_costs[model]["prompt"] + + completion_tokens * self.token_costs[model]["completion"] ) / 1000 self.total_cost += cost logger.info( From 21d588ae0e793d5154b9793bb9ba1194e0fb2d07 Mon Sep 17 00:00:00 2001 From: betterwang Date: Wed, 28 Feb 2024 09:38:12 +0800 Subject: [PATCH 11/12] update missing qifan ut --- tests/metagpt/provider/test_qianfan_api.py | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/metagpt/provider/test_qianfan_api.py diff --git a/tests/metagpt/provider/test_qianfan_api.py b/tests/metagpt/provider/test_qianfan_api.py new file mode 100644 index 000000000..28341425c --- /dev/null +++ b/tests/metagpt/provider/test_qianfan_api.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : the unittest of qianfan api + +from typing import AsyncIterator, Union + +import pytest +from qianfan.resources.typing import JsonBody, QfResponse + +from metagpt.provider.qianfan_api import QianFanLLM +from tests.metagpt.provider.mock_llm_config import mock_llm_config_qianfan +from tests.metagpt.provider.req_resp_const import ( + get_qianfan_response, + llm_general_chat_funcs_test, + messages, + prompt, + resp_cont_tmpl, +) + +name = "ERNIE-Bot-turbo" +resp_cont = resp_cont_tmpl.format(name=name) + + +def mock_qianfan_do(self, messages: list[dict], model: str, stream: bool = False, system: str = None) -> QfResponse: + return get_qianfan_response(name=name) + + +async def mock_qianfan_ado( + self, messages: list[dict], model: str, stream: bool = True, system: str = None +) -> Union[QfResponse, AsyncIterator[QfResponse]]: + resps = [get_qianfan_response(name=name)] + if stream: + + async def aresp_iterator(resps: list[JsonBody]): + for resp in resps: + yield resp + + return aresp_iterator(resps) + else: + return resps[0] + + +@pytest.mark.asyncio +async def test_qianfan_acompletion(mocker): + mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.do", mock_qianfan_do) + mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.ado", mock_qianfan_ado) + + qianfan_llm = QianFanLLM(mock_llm_config_qianfan) + + resp = qianfan_llm.completion(messages) + assert resp.get("result") == resp_cont + + resp = await qianfan_llm.acompletion(messages) + assert resp.get("result") == resp_cont + + await llm_general_chat_funcs_test(qianfan_llm, prompt, messages, resp_cont) From ad88c5000d135303a23a22da56ba240867ec44fc Mon Sep 17 00:00:00 2001 From: betterwang Date: Wed, 28 Feb 2024 12:09:33 +0800 Subject: [PATCH 12/12] fix typo --- examples/llm_hello_world.py | 6 ++-- metagpt/provider/base_llm.py | 4 +-- metagpt/provider/dashscope_api.py | 4 +-- metagpt/provider/qianfan_api.py | 12 ++++---- metagpt/utils/token_counter.py | 48 +++++++++++++++---------------- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py index 9340f7d5a..62fc2ed68 100644 --- a/examples/llm_hello_world.py +++ b/examples/llm_hello_world.py @@ -14,9 +14,9 @@ from metagpt.logs import logger async def main(): llm = LLM() # llm type check - id_ques = "what's your name" - logger.info(f"{id_ques}: ") - logger.info(await llm.aask(id_ques)) + question = "what's your name" + logger.info(f"{question}: ") + logger.info(await llm.aask(question)) logger.info("\n\n") logger.info( diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index 2f57b15aa..7cf3faac0 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -75,7 +75,7 @@ class BaseLLM(ABC): local_calc_usage (bool): some models don't calculate usage, it will overwrite LLMConfig.calc_usage """ calc_usage = self.config.calc_usage and local_calc_usage - model = model if model else self.model + model = model or self.model usage = usage.model_dump() if isinstance(usage, BaseModel) else usage if calc_usage and self.cost_manager: try: @@ -83,7 +83,7 @@ class BaseLLM(ABC): completion_tokens = int(usage.get("completion_tokens", 0)) self.cost_manager.update_cost(prompt_tokens, completion_tokens, model) except Exception as e: - logger.error(f"{self.__class__.__name__} updats costs failed! exp: {e}") + logger.error(f"{self.__class__.__name__} updates costs failed! exp: {e}") def get_costs(self) -> Costs: if not self.cost_manager: diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py index 7d7dcb7b7..f2b3a19a1 100644 --- a/metagpt/provider/dashscope_api.py +++ b/metagpt/provider/dashscope_api.py @@ -37,7 +37,7 @@ from metagpt.provider.base_llm import BaseLLM, LLMConfig from metagpt.provider.llm_provider_registry import LLMType, register_provider from metagpt.provider.openai_api import log_and_reraise from metagpt.utils.cost_manager import CostManager -from metagpt.utils.token_counter import DashScope_TOKEN_COSTS +from metagpt.utils.token_counter import DASHSCOPE_TOKEN_COSTS def build_api_arequest( @@ -167,7 +167,7 @@ class DashScopeLLM(BaseLLM): def __init_dashscope(self): self.model = self.config.model self.api_key = self.config.api_key - self.token_costs = DashScope_TOKEN_COSTS + self.token_costs = DASHSCOPE_TOKEN_COSTS self.aclient: AGeneration = AGeneration # check support system_message models diff --git a/metagpt/provider/qianfan_api.py b/metagpt/provider/qianfan_api.py index 6f94b9cea..4cbb76566 100644 --- a/metagpt/provider/qianfan_api.py +++ b/metagpt/provider/qianfan_api.py @@ -22,8 +22,8 @@ from metagpt.provider.llm_provider_registry import register_provider from metagpt.provider.openai_api import log_and_reraise from metagpt.utils.cost_manager import CostManager from metagpt.utils.token_counter import ( - QianFan_EndPoint_TOKEN_COSTS, - QianFan_MODEL_TOKEN_COSTS, + QIANFAN_ENDPOINT_TOKEN_COSTS, + QIANFAN_MODEL_TOKEN_COSTS, ) @@ -74,8 +74,8 @@ class QianFanLLM(BaseLLM): assert not (self.config.model and self.config.endpoint), "Only set `model` or `endpoint` in the config" assert self.config.model or self.config.endpoint, "Should set one of `model` or `endpoint` in the config" - self.token_costs = copy.deepcopy(QianFan_MODEL_TOKEN_COSTS) - self.token_costs.update(QianFan_EndPoint_TOKEN_COSTS) + self.token_costs = copy.deepcopy(QIANFAN_MODEL_TOKEN_COSTS) + self.token_costs.update(QIANFAN_ENDPOINT_TOKEN_COSTS) # self deployed model on the cloud not to calculate usage, it charges resource pool rental fee self.calc_usage = self.config.calc_usage and self.config.endpoint is None @@ -103,8 +103,8 @@ class QianFanLLM(BaseLLM): def _update_costs(self, usage: dict): """update each request's token cost""" - model_or_endpoint = self.config.model if self.config.model else self.config.endpoint - local_calc_usage = True if model_or_endpoint in self.token_costs else False + model_or_endpoint = self.config.model or self.config.endpoint + local_calc_usage = model_or_endpoint in self.token_costs super()._update_costs(usage, model_or_endpoint, local_calc_usage) def get_choice_text(self, resp: JsonBody) -> str: diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index 220014d8f..167a1d755 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -42,7 +42,7 @@ TOKEN_COSTS = { QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9 Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method. """ -QianFan_MODEL_TOKEN_COSTS = { +QIANFAN_MODEL_TOKEN_COSTS = { "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017}, "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067}, "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017}, @@ -66,35 +66,35 @@ QianFan_MODEL_TOKEN_COSTS = { "Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0}, } -QianFan_EndPoint_TOKEN_COSTS = { - "completions_pro": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-4"], - "ernie_bot_8k": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"], - "completions": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot"], - "eb-instant": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"], - "ai_apaas": QianFan_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"], - "ernie_speed": QianFan_MODEL_TOKEN_COSTS["ERNIE-Speed"], - "bloomz_7b1": QianFan_MODEL_TOKEN_COSTS["BLOOMZ-7B"], - "llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"], - "llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"], - "llama_2_70b": QianFan_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"], - "chatglm2_6b_32k": QianFan_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"], - "aquilachat_7b": QianFan_MODEL_TOKEN_COSTS["AquilaChat-7B"], - "mixtral_8x7b_instruct": QianFan_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"], - "sqlcoder_7b": QianFan_MODEL_TOKEN_COSTS["SQLCoder-7B"], - "codellama_7b_instruct": QianFan_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"], - "xuanyuan_70b_chat": QianFan_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"], - "qianfan_bloomz_7b_compressed": QianFan_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"], - "qianfan_chinese_llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"], - "qianfan_chinese_llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"], - "chatlaw": QianFan_MODEL_TOKEN_COSTS["ChatLaw"], - "yi_34b_chat": QianFan_MODEL_TOKEN_COSTS["Yi-34B-Chat"], +QIANFAN_ENDPOINT_TOKEN_COSTS = { + "completions_pro": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-4"], + "ernie_bot_8k": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"], + "completions": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot"], + "eb-instant": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"], + "ai_apaas": QIANFAN_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"], + "ernie_speed": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Speed"], + "bloomz_7b1": QIANFAN_MODEL_TOKEN_COSTS["BLOOMZ-7B"], + "llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"], + "llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"], + "llama_2_70b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"], + "chatglm2_6b_32k": QIANFAN_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"], + "aquilachat_7b": QIANFAN_MODEL_TOKEN_COSTS["AquilaChat-7B"], + "mixtral_8x7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"], + "sqlcoder_7b": QIANFAN_MODEL_TOKEN_COSTS["SQLCoder-7B"], + "codellama_7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"], + "xuanyuan_70b_chat": QIANFAN_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"], + "qianfan_bloomz_7b_compressed": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"], + "qianfan_chinese_llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"], + "qianfan_chinese_llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"], + "chatlaw": QIANFAN_MODEL_TOKEN_COSTS["ChatLaw"], + "yi_34b_chat": QIANFAN_MODEL_TOKEN_COSTS["Yi-34B-Chat"], } """ DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing Different model has different detail page. Attention, some model are free for a limited time. """ -DashScope_TOKEN_COSTS = { +DASHSCOPE_TOKEN_COSTS = { "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011}, "qwen-plus": {"prompt": 0.0028, "completion": 0.0028}, "qwen-max": {"prompt": 0.0, "completion": 0.0},