From c7ee54ace108f24571a5434ded2452a102be7c86 Mon Sep 17 00:00:00 2001
From: better629 <webdesign_xmu2012@163.com>
Date: Wed, 7 Feb 2024 22:50:30 +0800
Subject: [PATCH 01/12] add dashscope including QWEN and its ut code

---
 examples/llm_hello_world.py                  |   2 +-
 metagpt/configs/llm_config.py                |   1 +
 metagpt/provider/__init__.py                 |   2 +
 metagpt/provider/dashscope_api.py            | 246 +++++++++++++++++++
 metagpt/utils/token_counter.py               |  29 +++
 requirements.txt                             |   1 +
 tests/metagpt/provider/mock_llm_config.py    |   2 +
 tests/metagpt/provider/req_resp_const.py     |  28 +++
 tests/metagpt/provider/test_dashscope_api.py |  61 +++++
 9 files changed, 371 insertions(+), 1 deletion(-)
 create mode 100644 metagpt/provider/dashscope_api.py
 create mode 100644 tests/metagpt/provider/test_dashscope_api.py

diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py
index e22edbdf2..9340f7d5a 100644
--- a/examples/llm_hello_world.py
+++ b/examples/llm_hello_world.py
@@ -21,7 +21,7 @@ async def main():
 
     logger.info(
         await llm.aask(
-            "who are you", system_msgs=["act as a robot, answer 'I'am robot' if the question is 'who are you'"]
+            "who are you", system_msgs=["act as a robot, just answer 'I'am robot' if the question is 'who are you'"]
         )
     )
 
diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py
index 1b05b5270..36f5d7ae7 100644
--- a/metagpt/configs/llm_config.py
+++ b/metagpt/configs/llm_config.py
@@ -25,6 +25,7 @@ class LLMType(Enum):
     AZURE = "azure"
     OLLAMA = "ollama"
     QIANFAN = "qianfan"  # Baidu BCE
+    DASHSCOPE = "dashscope"  # Aliyun LingJi DashScope
 
     def __missing__(self, key):
         return self.OPENAI
diff --git a/metagpt/provider/__init__.py b/metagpt/provider/__init__.py
index 8c0aab836..44e6d3f3b 100644
--- a/metagpt/provider/__init__.py
+++ b/metagpt/provider/__init__.py
@@ -17,6 +17,7 @@ from metagpt.provider.metagpt_api import MetaGPTLLM
 from metagpt.provider.human_provider import HumanProvider
 from metagpt.provider.spark_api import SparkLLM
 from metagpt.provider.qianfan_api import QianFanLLM
+from metagpt.provider.dashscope_api import DashScopeLLM
 
 __all__ = [
     "FireworksLLM",
@@ -30,4 +31,5 @@ __all__ = [
     "HumanProvider",
     "SparkLLM",
     "QianFanLLM",
+    "DashScopeLLM",
 ]
diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py
new file mode 100644
index 000000000..58031e452
--- /dev/null
+++ b/metagpt/provider/dashscope_api.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   :
+
+import json
+from http import HTTPStatus
+from typing import Any, AsyncGenerator, Dict, List, Union
+
+import dashscope
+from dashscope.aigc.generation import Generation
+from dashscope.api_entities.aiohttp_request import AioHttpRequest
+from dashscope.api_entities.api_request_data import ApiRequestData
+from dashscope.api_entities.api_request_factory import _get_protocol_params
+from dashscope.api_entities.dashscope_response import (
+    GenerationOutput,
+    GenerationResponse,
+    Message,
+)
+from dashscope.client.base_api import BaseAioApi
+from dashscope.common.constants import SERVICE_API_PATH, ApiProtocol
+from dashscope.common.error import (
+    InputDataRequired,
+    InputRequired,
+    ModelRequired,
+    UnsupportedApiProtocol,
+)
+from tenacity import (
+    after_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+from metagpt.logs import log_llm_stream, logger
+from metagpt.provider.base_llm import BaseLLM, LLMConfig
+from metagpt.provider.llm_provider_registry import LLMType, register_provider
+from metagpt.provider.openai_api import log_and_reraise
+from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.token_counter import DashScore_TOKEN_COSTS
+
+
+def build_api_arequest(
+    model: str, input: object, task_group: str, task: str, function: str, api_key: str, is_service=True, **kwargs
+):
+    (
+        api_protocol,
+        ws_stream_mode,
+        is_binary_input,
+        http_method,
+        stream,
+        async_request,
+        query,
+        headers,
+        request_timeout,
+        form,
+        resources,
+    ) = _get_protocol_params(kwargs)
+    task_id = kwargs.pop("task_id", None)
+    if api_protocol in [ApiProtocol.HTTP, ApiProtocol.HTTPS]:
+        if not dashscope.base_http_api_url.endswith("/"):
+            http_url = dashscope.base_http_api_url + "/"
+        else:
+            http_url = dashscope.base_http_api_url
+
+        if is_service:
+            http_url = http_url + SERVICE_API_PATH + "/"
+
+        if task_group:
+            http_url += "%s/" % task_group
+        if task:
+            http_url += "%s/" % task
+        if function:
+            http_url += function
+        request = AioHttpRequest(
+            url=http_url,
+            api_key=api_key,
+            http_method=http_method,
+            stream=stream,
+            async_request=async_request,
+            query=query,
+            timeout=request_timeout,
+            task_id=task_id,
+        )
+    else:
+        raise UnsupportedApiProtocol("Unsupported protocol: %s, support [http, https, websocket]" % api_protocol)
+
+    if headers is not None:
+        request.add_headers(headers=headers)
+
+    if input is None and form is None:
+        raise InputDataRequired("There is no input data and form data")
+
+    request_data = ApiRequestData(
+        model,
+        task_group=task_group,
+        task=task,
+        function=function,
+        input=input,
+        form=form,
+        is_binary_input=is_binary_input,
+        api_protocol=api_protocol,
+    )
+    request_data.add_resources(resources)
+    request_data.add_parameters(**kwargs)
+    request.data = request_data
+    return request
+
+
+class AGeneration(Generation, BaseAioApi):
+    @classmethod
+    async def acall(
+        cls,
+        model: str,
+        prompt: Any = None,
+        history: list = None,
+        api_key: str = None,
+        messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        **kwargs,
+    ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
+        if (prompt is None or not prompt) and (messages is None or not messages):
+            raise InputRequired("prompt or messages is required!")
+        if model is None or not model:
+            raise ModelRequired("Model is required!")
+        task_group, function = "aigc", "generation"  # fixed value
+        if plugins is not None:
+            headers = kwargs.pop("headers", {})
+            if isinstance(plugins, str):
+                headers["X-DashScope-Plugin"] = plugins
+            else:
+                headers["X-DashScope-Plugin"] = json.dumps(plugins)
+            kwargs["headers"] = headers
+        input, parameters = cls._build_input_parameters(model, prompt, history, messages, **kwargs)
+
+        api_key, model = BaseAioApi._validate_params(api_key, model)
+        request = build_api_arequest(
+            model=model,
+            input=input,
+            task_group=task_group,
+            task=Generation.task,
+            function=function,
+            api_key=api_key,
+            **kwargs,
+        )
+        response = await request.aio_call()
+        is_stream = kwargs.get("stream", False)
+        if is_stream:
+
+            async def aresp_iterator(response):
+                async for resp in response:
+                    yield GenerationResponse.from_api_response(resp)
+
+            return aresp_iterator(response)
+        else:
+            return GenerationResponse.from_api_response(response)
+
+
+@register_provider(LLMType.DASHSCOPE)
+class DashScopeLLM(BaseLLM):
+    def __init__(self, llm_config: LLMConfig):
+        self.config = llm_config
+        self.use_system_prompt = False  # only some models support system_prompt
+        self.__init_dashscope()
+        self.cost_manager = CostManager(token_costs=self.token_costs)
+
+    def __init_dashscope(self):
+        self.model = self.config.model
+        self.api_key = self.config.api_key
+        self.token_costs = DashScore_TOKEN_COSTS
+        self.aclient: AGeneration = AGeneration
+
+        # check support system_message models
+        support_system_models = [
+            "qwen-",  # all support
+            "llama2-",  # all support
+            "baichuan2-7b-chat-v1",
+            "chatglm3-6b",
+        ]
+        for support_model in support_system_models:
+            if support_model in self.model:
+                self.use_system_prompt = True
+
+    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
+        kwargs = {
+            "api_key": self.api_key,
+            "model": self.model,
+            "messages": messages,
+            "stream": stream,
+            "result_format": "message",
+        }
+        if self.config.temperature > 0:
+            # different model has default temperature. only set when it"s specified.
+            kwargs["temperature"] = self.config.temperature
+        return kwargs
+
+    def _check_response(self, resp: GenerationResponse):
+        if resp.status_code != HTTPStatus.OK:
+            raise RuntimeError(f"code: {resp.code}, request_id: {resp.request_id}, message: {resp.message}")
+
+    def get_choice_text(self, output: GenerationOutput) -> str:
+        return output.get("choices", [{}])[0].get("message", {}).get("content", "")
+
+    def completion(self, messages: list[dict]) -> GenerationOutput:
+        resp: GenerationResponse = self.aclient.call(**self._const_kwargs(messages, stream=False))
+        self._check_response(resp)
+
+        self._update_costs(dict(resp.usage))
+        return resp.output
+
+    async def _achat_completion(self, messages: list[dict]) -> GenerationOutput:
+        resp: GenerationResponse = await self.aclient.acall(**self._const_kwargs(messages, stream=False))
+        self._check_response(resp)
+        self._update_costs(dict(resp.usage))
+        return resp.output
+
+    async def acompletion(self, messages: list[dict], timeout=3) -> GenerationOutput:
+        return await self._achat_completion(messages)
+
+    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+        resp = await self.aclient.acall(**self._const_kwargs(messages, stream=True))
+        collected_content = []
+        usage = {}
+        async for chunk in resp:
+            self._check_response(chunk)
+            content = chunk.output.choices[0]["message"]["content"]
+            usage = dict(chunk.usage)  # each chunk has usage
+            log_llm_stream(content)
+            collected_content.append(content)
+        log_llm_stream("\n")
+        self._update_costs(usage)
+        full_content = "".join(collected_content)
+        return full_content
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_random_exponential(min=1, max=60),
+        after=after_log(logger, logger.level("WARNING").name),
+        retry=retry_if_exception_type(ConnectionError),
+        retry_error_callback=log_and_reraise,
+    )
+    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
+        if stream:
+            return await self._achat_completion_stream(messages)
+        resp = await self._achat_completion(messages)
+        return self.get_choice_text(resp)
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index b69ec73d3..cac706f6c 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -90,6 +90,35 @@ QianFan_EndPoint_TOKEN_COSTS = {
     "yi_34b_chat": QianFan_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
 }
 
+"""
+DashScore Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
+Different model has different detail page. Attention, some model are free for a limited time.
+"""
+DashScore_TOKEN_COSTS = {
+    "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
+    "qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
+    "qwen-max": {"prompt": 0.0, "completion": 0.0},
+    "qwen-max-1201": {"prompt": 0.0, "completion": 0.0},
+    "qwen-max-longcontext": {"prompt": 0.0, "completion": 0.0},
+    "llama2-7b-chat-v2": {"prompt": 0.0, "completion": 0.0},
+    "llama2-13b-chat-v2": {"prompt": 0.0, "completion": 0.0},
+    "qwen-72b-chat": {"prompt": 0.0, "completion": 0.0},
+    "qwen-14b-chat": {"prompt": 0.0011, "completion": 0.0011},
+    "qwen-7b-chat": {"prompt": 0.00084, "completion": 0.00084},
+    "qwen-1.8b-chat": {"prompt": 0.0, "completion": 0.0},
+    "baichuan2-13b-chat-v1": {"prompt": 0.0011, "completion": 0.0011},
+    "baichuan2-7b-chat-v1": {"prompt": 0.00084, "completion": 0.00084},
+    "baichuan-7b-v1": {"prompt": 0.0, "completion": 0.0},
+    "chatglm-6b-v2": {"prompt": 0.0011, "completion": 0.0011},
+    "chatglm3-6b": {"prompt": 0.0, "completion": 0.0},
+    "ziya-llama-13b-v1": {"prompt": 0.0, "completion": 0.0},  # no price page, judge it as free
+    "dolly-12b-v2": {"prompt": 0.0, "completion": 0.0},
+    "belle-llama-13b-2m-v1": {"prompt": 0.0, "completion": 0.0},
+    "moss-moon-003-sft-v1": {"prompt": 0.0, "completion": 0.0},
+    "chatyuan-large-v2": {"prompt": 0.0, "completion": 0.0},
+    "billa-7b-sft-v1": {"prompt": 0.0, "completion": 0.0},
+}
+
 
 TOKEN_MAX = {
     "gpt-3.5-turbo": 4096,
diff --git a/requirements.txt b/requirements.txt
index c893bd713..ff5f97252 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -68,3 +68,4 @@ anytree
 ipywidgets==8.1.1
 Pillow
 qianfan==0.3.1
+dashscope==1.14.1
diff --git a/tests/metagpt/provider/mock_llm_config.py b/tests/metagpt/provider/mock_llm_config.py
index e0afaa51e..bb5508d80 100644
--- a/tests/metagpt/provider/mock_llm_config.py
+++ b/tests/metagpt/provider/mock_llm_config.py
@@ -54,3 +54,5 @@ mock_llm_config_spark = LLMConfig(
 )
 
 mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo")
+
+mock_llm_config_dashscope = LLMConfig(api_type="dashscore", api_key="xxx", model="qwen-max")
diff --git a/tests/metagpt/provider/req_resp_const.py b/tests/metagpt/provider/req_resp_const.py
index 73939e1c6..802962013 100644
--- a/tests/metagpt/provider/req_resp_const.py
+++ b/tests/metagpt/provider/req_resp_const.py
@@ -3,6 +3,12 @@
 # @Desc   : default request & response data for provider unittest
 
 
+from dashscope.api_entities.dashscope_response import (
+    DashScopeAPIResponse,
+    GenerationOutput,
+    GenerationResponse,
+    GenerationUsage,
+)
 from openai.types.chat.chat_completion import (
     ChatCompletion,
     ChatCompletionMessage,
@@ -102,6 +108,28 @@ def get_qianfan_response(name: str) -> QfResponse:
     return QfResponse(code=200, body=qf_jsonbody_dict)
 
 
+# For DashScope
+def get_dashscope_response(name: str) -> GenerationResponse:
+    return GenerationResponse.from_api_response(
+        DashScopeAPIResponse(
+            status_code=200,
+            output=GenerationOutput(
+                **{
+                    "text": "",
+                    "finish_reason": "",
+                    "choices": [
+                        {
+                            "finish_reason": "stop",
+                            "message": {"role": "assistant", "content": resp_cont_tmpl.format(name=name)},
+                        }
+                    ],
+                }
+            ),
+            usage=GenerationUsage(**{"input_tokens": 12, "output_tokens": 98, "total_tokens": 110}),
+        )
+    )
+
+
 # For llm general chat functions call
 async def llm_general_chat_funcs_test(llm: BaseLLM, prompt: str, messages: list[dict], resp_cont: str):
     resp = await llm.aask(prompt, stream=False)
diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py
new file mode 100644
index 000000000..48a676bc7
--- /dev/null
+++ b/tests/metagpt/provider/test_dashscope_api.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : the unittest of DashScopeLLM
+
+from typing import AsyncGenerator, Union
+
+import pytest
+from dashscope.api_entities.dashscope_response import GenerationResponse
+
+from metagpt.provider.dashscope_api import DashScopeLLM
+from tests.metagpt.provider.mock_llm_config import mock_llm_config_dashscope
+from tests.metagpt.provider.req_resp_const import (
+    get_dashscope_response,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)
+
+name = "qwen-max"
+resp_cont = resp_cont_tmpl.format(name=name)
+
+
+@classmethod
+def mock_dashscope_call(
+    cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False
+) -> GenerationResponse:
+    return get_dashscope_response(name)
+
+
+@classmethod
+async def mock_dashscope_acall(
+    cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False
+) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]:
+    resps = [get_dashscope_response(name)]
+
+    if stream:
+
+        async def aresp_iterator(resps: list[GenerationResponse]):
+            for resp in resps:
+                yield resp
+
+        return aresp_iterator(resps)
+    else:
+        return resps[0]
+
+
+@pytest.mark.asyncio
+async def test_dashscope_acompletion(mocker):
+    mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call)
+    mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall)
+
+    dashscore_llm = DashScopeLLM(mock_llm_config_dashscope)
+
+    resp = dashscore_llm.completion(messages)
+    assert resp.choices[0]["message"]["content"] == resp_cont
+
+    resp = await dashscore_llm.acompletion(messages)
+    assert resp.choices[0]["message"]["content"] == resp_cont
+
+    await llm_general_chat_funcs_test(dashscore_llm, prompt, messages, resp_cont)

From e22f5c72ec7936b7a8f156208dc5be628a21de13 Mon Sep 17 00:00:00 2001
From: better629 <webdesign_xmu2012@163.com>
Date: Wed, 7 Feb 2024 23:01:40 +0800
Subject: [PATCH 02/12] update spell-mistake

---
 tests/metagpt/provider/mock_llm_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/metagpt/provider/mock_llm_config.py b/tests/metagpt/provider/mock_llm_config.py
index bb5508d80..e75acf68f 100644
--- a/tests/metagpt/provider/mock_llm_config.py
+++ b/tests/metagpt/provider/mock_llm_config.py
@@ -55,4 +55,4 @@ mock_llm_config_spark = LLMConfig(
 
 mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo")
 
-mock_llm_config_dashscope = LLMConfig(api_type="dashscore", api_key="xxx", model="qwen-max")
+mock_llm_config_dashscope = LLMConfig(api_type="dashscope", api_key="xxx", model="qwen-max")

From 4dde9b71304bd3260ae0f675567fd0a0bfc2b085 Mon Sep 17 00:00:00 2001
From: better629 <webdesign_xmu2012@163.com>
Date: Wed, 7 Feb 2024 23:15:56 +0800
Subject: [PATCH 03/12] stream using log_llm_stream

---
 metagpt/provider/fireworks_api.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py
index e62a7066e..d0394296d 100644
--- a/metagpt/provider/fireworks_api.py
+++ b/metagpt/provider/fireworks_api.py
@@ -16,7 +16,7 @@ from tenacity import (
 )
 
 from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import logger
+from metagpt.logs import log_llm_stream, logger
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import OpenAILLM, log_and_reraise
 from metagpt.utils.cost_manager import CostManager
@@ -96,10 +96,11 @@ class FireworksLLM(OpenAILLM):
                 finish_reason = choice.finish_reason if hasattr(choice, "finish_reason") else None
                 if choice_delta.content:
                     collected_content.append(choice_delta.content)
-                    print(choice_delta.content, end="")
+                    log_llm_stream(choice_delta.content)
                 if finish_reason:
                     # fireworks api return usage when finish_reason is not None
                     usage = CompletionUsage(**chunk.usage)
+        log_llm_stream("\n")
 
         full_content = "".join(collected_content)
         self._update_costs(usage.model_dump())

From cec879f71b84667297356b99e49ad68cf1f503db Mon Sep 17 00:00:00 2001
From: better629 <webdesign_xmu2012@163.com>
Date: Sun, 18 Feb 2024 20:01:27 +0800
Subject: [PATCH 04/12] update repair llm with backslash problem

---
 metagpt/utils/repair_llm_raw_output.py            | 12 ++++++++++++
 tests/metagpt/utils/test_repair_llm_raw_output.py |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py
index 06484f71d..b8756e8c6 100644
--- a/metagpt/utils/repair_llm_raw_output.py
+++ b/metagpt/utils/repair_llm_raw_output.py
@@ -119,6 +119,7 @@ def repair_json_format(output: str) -> str:
         logger.info(f"repair_json_format: {'}]'}")
     elif output.startswith("{") and output.endswith("]"):
         output = output[:-1] + "}"
+
     # remove comments in output json string, after json value content, maybe start with #, maybe start with //
     arr = output.split("\n")
     new_arr = []
@@ -208,6 +209,17 @@ def repair_invalid_json(output: str, error: str) -> str:
         elif (rline[col_no] in ["'", '"']) and (line.startswith('"') or line.startswith("'")) and "," not in line:
             # problem, `"""` or `'''` without `,`
             new_line = f",{line}"
+        elif col_no - 1 >= 0 and rline[col_no - 1] in ['"', "'"]:
+            # backslash problem like \" in the output
+            char = rline[col_no - 1]
+            nearest_char_idx = rline[col_no:].find(char)
+            new_line = (
+                rline[: col_no - 1]
+                + "\\"
+                + rline[col_no - 1 : col_no + nearest_char_idx]
+                + "\\"
+                + rline[col_no + nearest_char_idx :]
+            )
         elif '",' not in line and "," not in line and '"' not in line:
             new_line = f'{line}",'
         elif not line.endswith(","):
diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py
index e28423b91..7a29ea3ee 100644
--- a/tests/metagpt/utils/test_repair_llm_raw_output.py
+++ b/tests/metagpt/utils/test_repair_llm_raw_output.py
@@ -211,6 +211,11 @@ value
     output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1")
     assert output == target_output
 
+    raw_output = '{"key": "url "http" \\"https\\" "}'
+    target_output = '{"key": "url \\"http\\" \\"https\\" "}'
+    output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 1 column 15 (char 14)")
+    assert output == target_output
+
 
 def test_retry_parse_json_text():
     from metagpt.utils.repair_llm_raw_output import retry_parse_json_text

From 41d6d2be1d26518593c30ce8af9b9ddc865eeded Mon Sep 17 00:00:00 2001
From: better629 <webdesign_xmu2012@163.com>
Date: Sun, 18 Feb 2024 20:02:42 +0800
Subject: [PATCH 05/12] update dashscope stream output

---
 metagpt/provider/dashscope_api.py | 2 ++
 requirements.txt                  | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py
index 58031e452..c02a302e4 100644
--- a/metagpt/provider/dashscope_api.py
+++ b/metagpt/provider/dashscope_api.py
@@ -192,6 +192,8 @@ class DashScopeLLM(BaseLLM):
         if self.config.temperature > 0:
             # different model has default temperature. only set when it"s specified.
             kwargs["temperature"] = self.config.temperature
+        if stream:
+            kwargs["incremental_output"] = True
         return kwargs
 
     def _check_response(self, resp: GenerationResponse):
diff --git a/requirements.txt b/requirements.txt
index cfac35717..9759248bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,7 +27,7 @@ python_docx==0.8.11
 PyYAML==6.0.1
 # sentence_transformers==2.2.2
 setuptools==65.6.3
-tenacity==8.2.2
+tenacity==8.2.3
 tiktoken==0.5.2
 tqdm==4.65.0
 #unstructured[local-inference]
@@ -68,4 +68,4 @@ anytree
 ipywidgets==8.1.1
 Pillow
 qianfan==0.3.1
-dashscope==1.14.1
+dashscope==1.14.1
\ No newline at end of file

From 655ba5647b82106bd8fc63035bb3b8c5da4a479c Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Fri, 23 Feb 2024 12:45:06 +0800
Subject: [PATCH 06/12] update qianfan pypi version

---
 metagpt/utils/cost_manager.py | 2 +-
 requirements.txt              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py
index 4e6b65b2c..929d0b2e5 100644
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@@ -42,7 +42,7 @@ class CostManager(BaseModel):
         """
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
-        if model not in TOKEN_COSTS:
+        if model not in self.token_costs:
             logger.warning(f"Model {model} not found in TOKEN_COSTS.")
             return
 
diff --git a/requirements.txt b/requirements.txt
index 9759248bb..4651cb258 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -67,5 +67,5 @@ playwright>=1.26  # used at metagpt/tools/libs/web_scraping.py
 anytree
 ipywidgets==8.1.1
 Pillow
-qianfan==0.3.1
-dashscope==1.14.1
\ No newline at end of file
+qianfan==0.3.2
+dashscope==1.14.1

From 6893f78c212a65eeafb746e9fc55372a709aee61 Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Fri, 23 Feb 2024 12:54:46 +0800
Subject: [PATCH 07/12] update ut of dashscope

---
 tests/metagpt/provider/test_dashscope_api.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py
index 48a676bc7..df6e92af0 100644
--- a/tests/metagpt/provider/test_dashscope_api.py
+++ b/tests/metagpt/provider/test_dashscope_api.py
@@ -23,14 +23,26 @@ resp_cont = resp_cont_tmpl.format(name=name)
 
 @classmethod
 def mock_dashscope_call(
-    cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False
+    cls,
+    messages: list[dict],
+    model: str,
+    api_key: str,
+    result_format: str,
+    incremental_output: bool = True,
+    stream: bool = False,
 ) -> GenerationResponse:
     return get_dashscope_response(name)
 
 
 @classmethod
 async def mock_dashscope_acall(
-    cls, messages: list[dict], model: str, api_key: str, result_format: str, stream: bool = False
+    cls,
+    messages: list[dict],
+    model: str,
+    api_key: str,
+    result_format: str,
+    incremental_output: bool = True,
+    stream: bool = False,
 ) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]:
     resps = [get_dashscope_response(name)]
 

From 79c71dc5ad980c32ee8103f8b8a08afb264ba217 Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Fri, 23 Feb 2024 20:28:35 +0800
Subject: [PATCH 08/12] update ERNIE-Bot token cost

---
 metagpt/utils/token_counter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index 5a1b2f2e7..53dc9e9c7 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -45,7 +45,7 @@ Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a s
 QianFan_MODEL_TOKEN_COSTS = {
     "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017},
     "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067},
-    "ERNIE-Bot": {"prompt": 0.017, "completion": 0.017},
+    "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017},
     "ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011},
     "EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011},
     "ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011},

From 2d17da28262210a6f03977e7277811fdf3d0bb10 Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Sat, 24 Feb 2024 15:00:45 +0800
Subject: [PATCH 09/12] fix dashcope spell

---
 metagpt/provider/dashscope_api.py            | 4 ++--
 metagpt/utils/token_counter.py               | 4 ++--
 tests/metagpt/provider/test_dashscope_api.py | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py
index c02a302e4..7d7dcb7b7 100644
--- a/metagpt/provider/dashscope_api.py
+++ b/metagpt/provider/dashscope_api.py
@@ -37,7 +37,7 @@ from metagpt.provider.base_llm import BaseLLM, LLMConfig
 from metagpt.provider.llm_provider_registry import LLMType, register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import CostManager
-from metagpt.utils.token_counter import DashScore_TOKEN_COSTS
+from metagpt.utils.token_counter import DashScope_TOKEN_COSTS
 
 
 def build_api_arequest(
@@ -167,7 +167,7 @@ class DashScopeLLM(BaseLLM):
     def __init_dashscope(self):
         self.model = self.config.model
         self.api_key = self.config.api_key
-        self.token_costs = DashScore_TOKEN_COSTS
+        self.token_costs = DashScope_TOKEN_COSTS
         self.aclient: AGeneration = AGeneration
 
         # check support system_message models
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index 53dc9e9c7..220014d8f 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -91,10 +91,10 @@ QianFan_EndPoint_TOKEN_COSTS = {
 }
 
 """
-DashScore Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
+DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
 Different model has different detail page. Attention, some model are free for a limited time.
 """
-DashScore_TOKEN_COSTS = {
+DashScope_TOKEN_COSTS = {
     "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
     "qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
     "qwen-max": {"prompt": 0.0, "completion": 0.0},
diff --git a/tests/metagpt/provider/test_dashscope_api.py b/tests/metagpt/provider/test_dashscope_api.py
index df6e92af0..a6dd8f247 100644
--- a/tests/metagpt/provider/test_dashscope_api.py
+++ b/tests/metagpt/provider/test_dashscope_api.py
@@ -62,12 +62,12 @@ async def test_dashscope_acompletion(mocker):
     mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call)
     mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall)
 
-    dashscore_llm = DashScopeLLM(mock_llm_config_dashscope)
+    dashscope_llm = DashScopeLLM(mock_llm_config_dashscope)
 
-    resp = dashscore_llm.completion(messages)
+    resp = dashscope_llm.completion(messages)
     assert resp.choices[0]["message"]["content"] == resp_cont
 
-    resp = await dashscore_llm.acompletion(messages)
+    resp = await dashscope_llm.acompletion(messages)
     assert resp.choices[0]["message"]["content"] == resp_cont
 
-    await llm_general_chat_funcs_test(dashscore_llm, prompt, messages, resp_cont)
+    await llm_general_chat_funcs_test(dashscope_llm, prompt, messages, resp_cont)

From 51bd8a056e4a3f4ffc4bdda22018429841a793b4 Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Wed, 28 Feb 2024 09:36:44 +0800
Subject: [PATCH 10/12] update missing content

---
 metagpt/configs/llm_config.py |  7 ++++++-
 metagpt/provider/base_llm.py  | 25 ++++++++++++++++++++++++-
 metagpt/utils/cost_manager.py |  4 +++-
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py
index 4ed741af3..36f5d7ae7 100644
--- a/metagpt/configs/llm_config.py
+++ b/metagpt/configs/llm_config.py
@@ -38,13 +38,18 @@ class LLMConfig(YamlModel):
     Optional Fields in pydantic: https://docs.pydantic.dev/latest/migration/#required-optional-and-nullable-fields
     """
 
-    api_key: str
+    api_key: str = "sk-"
     api_type: LLMType = LLMType.OPENAI
     base_url: str = "https://api.openai.com/v1"
     api_version: Optional[str] = None
 
     model: Optional[str] = None  # also stands for DEPLOYMENT_NAME
 
+    # For Cloud Service Provider like Baidu/ Alibaba
+    access_key: Optional[str] = None
+    secret_key: Optional[str] = None
+    endpoint: Optional[str] = None  # for self-deployed model on the cloud
+
     # For Spark(Xunfei), maybe remove later
     app_id: Optional[str] = None
     api_secret: Optional[str] = None
diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py
index b144471b5..2f57b15aa 100644
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@@ -11,11 +11,12 @@ from abc import ABC, abstractmethod
 from typing import Optional, Union
 
 from openai import AsyncOpenAI
+from pydantic import BaseModel
 
 from metagpt.configs.llm_config import LLMConfig
 from metagpt.logs import logger
 from metagpt.schema import Message
-from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.cost_manager import CostManager, Costs
 
 
 class BaseLLM(ABC):
@@ -67,6 +68,28 @@ class BaseLLM(ABC):
     def _default_system_msg(self):
         return self._system_msg(self.system_prompt)
 
+    def _update_costs(self, usage: Union[dict, BaseModel], model: str = None, local_calc_usage: bool = True):
+        """update each request's token cost
+        Args:
+            model (str): model name or in some scenarios called endpoint
+            local_calc_usage (bool): some models don't calculate usage, it will overwrite LLMConfig.calc_usage
+        """
+        calc_usage = self.config.calc_usage and local_calc_usage
+        model = model if model else self.model
+        usage = usage.model_dump() if isinstance(usage, BaseModel) else usage
+        if calc_usage and self.cost_manager:
+            try:
+                prompt_tokens = int(usage.get("prompt_tokens", 0))
+                completion_tokens = int(usage.get("completion_tokens", 0))
+                self.cost_manager.update_cost(prompt_tokens, completion_tokens, model)
+            except Exception as e:
+                logger.error(f"{self.__class__.__name__} updats costs failed! exp: {e}")
+
+    def get_costs(self) -> Costs:
+        if not self.cost_manager:
+            return Costs(0, 0, 0, 0)
+        return self.cost_manager.get_costs()
+
     async def aask(
         self,
         msg: str,
diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py
index c3aa7323f..efff07ae1 100644
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@@ -29,6 +29,7 @@ class CostManager(BaseModel):
     total_budget: float = 0
     max_budget: float = 10.0
     total_cost: float = 0
+    token_costs: dict[str, dict[str, float]] = TOKEN_COSTS  # different model's token cost
 
     def update_cost(self, prompt_tokens, completion_tokens, model):
         """
@@ -46,7 +47,8 @@ class CostManager(BaseModel):
             return
 
         cost = (
-            prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"]
+            prompt_tokens * self.token_costs[model]["prompt"]
+            + completion_tokens * self.token_costs[model]["completion"]
         ) / 1000
         self.total_cost += cost
         logger.info(

From 21d588ae0e793d5154b9793bb9ba1194e0fb2d07 Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Wed, 28 Feb 2024 09:38:12 +0800
Subject: [PATCH 11/12] update missing qifan ut

---
 tests/metagpt/provider/test_qianfan_api.py | 56 ++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 tests/metagpt/provider/test_qianfan_api.py

diff --git a/tests/metagpt/provider/test_qianfan_api.py b/tests/metagpt/provider/test_qianfan_api.py
new file mode 100644
index 000000000..28341425c
--- /dev/null
+++ b/tests/metagpt/provider/test_qianfan_api.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : the unittest of qianfan api
+
+from typing import AsyncIterator, Union
+
+import pytest
+from qianfan.resources.typing import JsonBody, QfResponse
+
+from metagpt.provider.qianfan_api import QianFanLLM
+from tests.metagpt.provider.mock_llm_config import mock_llm_config_qianfan
+from tests.metagpt.provider.req_resp_const import (
+    get_qianfan_response,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)
+
+name = "ERNIE-Bot-turbo"
+resp_cont = resp_cont_tmpl.format(name=name)
+
+
+def mock_qianfan_do(self, messages: list[dict], model: str, stream: bool = False, system: str = None) -> QfResponse:
+    return get_qianfan_response(name=name)
+
+
+async def mock_qianfan_ado(
+    self, messages: list[dict], model: str, stream: bool = True, system: str = None
+) -> Union[QfResponse, AsyncIterator[QfResponse]]:
+    resps = [get_qianfan_response(name=name)]
+    if stream:
+
+        async def aresp_iterator(resps: list[JsonBody]):
+            for resp in resps:
+                yield resp
+
+        return aresp_iterator(resps)
+    else:
+        return resps[0]
+
+
+@pytest.mark.asyncio
+async def test_qianfan_acompletion(mocker):
+    mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.do", mock_qianfan_do)
+    mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.ado", mock_qianfan_ado)
+
+    qianfan_llm = QianFanLLM(mock_llm_config_qianfan)
+
+    resp = qianfan_llm.completion(messages)
+    assert resp.get("result") == resp_cont
+
+    resp = await qianfan_llm.acompletion(messages)
+    assert resp.get("result") == resp_cont
+
+    await llm_general_chat_funcs_test(qianfan_llm, prompt, messages, resp_cont)

From ad88c5000d135303a23a22da56ba240867ec44fc Mon Sep 17 00:00:00 2001
From: betterwang <betterwang@fuzhi.ai>
Date: Wed, 28 Feb 2024 12:09:33 +0800
Subject: [PATCH 12/12] fix typo

---
 examples/llm_hello_world.py       |  6 ++--
 metagpt/provider/base_llm.py      |  4 +--
 metagpt/provider/dashscope_api.py |  4 +--
 metagpt/provider/qianfan_api.py   | 12 ++++----
 metagpt/utils/token_counter.py    | 48 +++++++++++++++----------------
 5 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py
index 9340f7d5a..62fc2ed68 100644
--- a/examples/llm_hello_world.py
+++ b/examples/llm_hello_world.py
@@ -14,9 +14,9 @@ from metagpt.logs import logger
 async def main():
     llm = LLM()
     # llm type check
-    id_ques = "what's your name"
-    logger.info(f"{id_ques}: ")
-    logger.info(await llm.aask(id_ques))
+    question = "what's your name"
+    logger.info(f"{question}: ")
+    logger.info(await llm.aask(question))
     logger.info("\n\n")
 
     logger.info(
diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py
index 2f57b15aa..7cf3faac0 100644
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@@ -75,7 +75,7 @@ class BaseLLM(ABC):
             local_calc_usage (bool): some models don't calculate usage, it will overwrite LLMConfig.calc_usage
         """
         calc_usage = self.config.calc_usage and local_calc_usage
-        model = model if model else self.model
+        model = model or self.model
         usage = usage.model_dump() if isinstance(usage, BaseModel) else usage
         if calc_usage and self.cost_manager:
             try:
@@ -83,7 +83,7 @@ class BaseLLM(ABC):
                 completion_tokens = int(usage.get("completion_tokens", 0))
                 self.cost_manager.update_cost(prompt_tokens, completion_tokens, model)
             except Exception as e:
-                logger.error(f"{self.__class__.__name__} updats costs failed! exp: {e}")
+                logger.error(f"{self.__class__.__name__} updates costs failed! exp: {e}")
 
     def get_costs(self) -> Costs:
         if not self.cost_manager:
diff --git a/metagpt/provider/dashscope_api.py b/metagpt/provider/dashscope_api.py
index 7d7dcb7b7..f2b3a19a1 100644
--- a/metagpt/provider/dashscope_api.py
+++ b/metagpt/provider/dashscope_api.py
@@ -37,7 +37,7 @@ from metagpt.provider.base_llm import BaseLLM, LLMConfig
 from metagpt.provider.llm_provider_registry import LLMType, register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import CostManager
-from metagpt.utils.token_counter import DashScope_TOKEN_COSTS
+from metagpt.utils.token_counter import DASHSCOPE_TOKEN_COSTS
 
 
 def build_api_arequest(
@@ -167,7 +167,7 @@ class DashScopeLLM(BaseLLM):
     def __init_dashscope(self):
         self.model = self.config.model
         self.api_key = self.config.api_key
-        self.token_costs = DashScope_TOKEN_COSTS
+        self.token_costs = DASHSCOPE_TOKEN_COSTS
         self.aclient: AGeneration = AGeneration
 
         # check support system_message models
diff --git a/metagpt/provider/qianfan_api.py b/metagpt/provider/qianfan_api.py
index 6f94b9cea..4cbb76566 100644
--- a/metagpt/provider/qianfan_api.py
+++ b/metagpt/provider/qianfan_api.py
@@ -22,8 +22,8 @@ from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import CostManager
 from metagpt.utils.token_counter import (
-    QianFan_EndPoint_TOKEN_COSTS,
-    QianFan_MODEL_TOKEN_COSTS,
+    QIANFAN_ENDPOINT_TOKEN_COSTS,
+    QIANFAN_MODEL_TOKEN_COSTS,
 )
 
 
@@ -74,8 +74,8 @@ class QianFanLLM(BaseLLM):
         assert not (self.config.model and self.config.endpoint), "Only set `model` or `endpoint` in the config"
         assert self.config.model or self.config.endpoint, "Should set one of `model` or `endpoint` in the config"
 
-        self.token_costs = copy.deepcopy(QianFan_MODEL_TOKEN_COSTS)
-        self.token_costs.update(QianFan_EndPoint_TOKEN_COSTS)
+        self.token_costs = copy.deepcopy(QIANFAN_MODEL_TOKEN_COSTS)
+        self.token_costs.update(QIANFAN_ENDPOINT_TOKEN_COSTS)
 
         # self deployed model on the cloud not to calculate usage, it charges resource pool rental fee
         self.calc_usage = self.config.calc_usage and self.config.endpoint is None
@@ -103,8 +103,8 @@ class QianFanLLM(BaseLLM):
 
     def _update_costs(self, usage: dict):
         """update each request's token cost"""
-        model_or_endpoint = self.config.model if self.config.model else self.config.endpoint
-        local_calc_usage = True if model_or_endpoint in self.token_costs else False
+        model_or_endpoint = self.config.model or self.config.endpoint
+        local_calc_usage = model_or_endpoint in self.token_costs
         super()._update_costs(usage, model_or_endpoint, local_calc_usage)
 
     def get_choice_text(self, resp: JsonBody) -> str:
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index 220014d8f..167a1d755 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -42,7 +42,7 @@ TOKEN_COSTS = {
 QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9
 Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method.
 """
-QianFan_MODEL_TOKEN_COSTS = {
+QIANFAN_MODEL_TOKEN_COSTS = {
     "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017},
     "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067},
     "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017},
@@ -66,35 +66,35 @@ QianFan_MODEL_TOKEN_COSTS = {
     "Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0},
 }
 
-QianFan_EndPoint_TOKEN_COSTS = {
-    "completions_pro": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-4"],
-    "ernie_bot_8k": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"],
-    "completions": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot"],
-    "eb-instant": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"],
-    "ai_apaas": QianFan_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"],
-    "ernie_speed": QianFan_MODEL_TOKEN_COSTS["ERNIE-Speed"],
-    "bloomz_7b1": QianFan_MODEL_TOKEN_COSTS["BLOOMZ-7B"],
-    "llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"],
-    "llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"],
-    "llama_2_70b": QianFan_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"],
-    "chatglm2_6b_32k": QianFan_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"],
-    "aquilachat_7b": QianFan_MODEL_TOKEN_COSTS["AquilaChat-7B"],
-    "mixtral_8x7b_instruct": QianFan_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"],
-    "sqlcoder_7b": QianFan_MODEL_TOKEN_COSTS["SQLCoder-7B"],
-    "codellama_7b_instruct": QianFan_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"],
-    "xuanyuan_70b_chat": QianFan_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"],
-    "qianfan_bloomz_7b_compressed": QianFan_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"],
-    "qianfan_chinese_llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"],
-    "qianfan_chinese_llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"],
-    "chatlaw": QianFan_MODEL_TOKEN_COSTS["ChatLaw"],
-    "yi_34b_chat": QianFan_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
+QIANFAN_ENDPOINT_TOKEN_COSTS = {
+    "completions_pro": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-4"],
+    "ernie_bot_8k": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"],
+    "completions": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot"],
+    "eb-instant": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"],
+    "ai_apaas": QIANFAN_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"],
+    "ernie_speed": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Speed"],
+    "bloomz_7b1": QIANFAN_MODEL_TOKEN_COSTS["BLOOMZ-7B"],
+    "llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"],
+    "llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"],
+    "llama_2_70b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"],
+    "chatglm2_6b_32k": QIANFAN_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"],
+    "aquilachat_7b": QIANFAN_MODEL_TOKEN_COSTS["AquilaChat-7B"],
+    "mixtral_8x7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"],
+    "sqlcoder_7b": QIANFAN_MODEL_TOKEN_COSTS["SQLCoder-7B"],
+    "codellama_7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"],
+    "xuanyuan_70b_chat": QIANFAN_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"],
+    "qianfan_bloomz_7b_compressed": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"],
+    "qianfan_chinese_llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"],
+    "qianfan_chinese_llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"],
+    "chatlaw": QIANFAN_MODEL_TOKEN_COSTS["ChatLaw"],
+    "yi_34b_chat": QIANFAN_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
 }
 
 """
 DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
 Different model has different detail page. Attention, some model are free for a limited time.
 """
-DashScope_TOKEN_COSTS = {
+DASHSCOPE_TOKEN_COSTS = {
     "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
     "qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
     "qwen-max": {"prompt": 0.0, "completion": 0.0},