diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py index 1d132eb8a..e22edbdf2 100644 --- a/examples/llm_hello_world.py +++ b/examples/llm_hello_world.py @@ -6,16 +6,25 @@ @File : llm_hello_world.py """ import asyncio -from pathlib import Path from metagpt.llm import LLM from metagpt.logs import logger -from metagpt.utils.common import encode_image async def main(): llm = LLM() - logger.info(await llm.aask("hello world")) + # llm type check + id_ques = "what's your name" + logger.info(f"{id_ques}: ") + logger.info(await llm.aask(id_ques)) + logger.info("\n\n") + + logger.info( + await llm.aask( + "who are you", system_msgs=["act as a robot, answer 'I'am robot' if the question is 'who are you'"] + ) + ) + logger.info(await llm.aask_batch(["hi", "write python hello world."])) hello_msg = [{"role": "user", "content": "count from 1 to 10. split by newline."}] @@ -29,12 +38,6 @@ async def main(): if hasattr(llm, "completion"): logger.info(llm.completion(hello_msg)) - # check if the configured llm supports llm-vision capacity. If not, it will throw a error - invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") - img_base64 = encode_image(invoice_path) - res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64]) - assert "true" in res.lower() - if __name__ == "__main__": asyncio.run(main()) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index fb923d3e4..1b05b5270 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -24,6 +24,7 @@ class LLMType(Enum): METAGPT = "metagpt" AZURE = "azure" OLLAMA = "ollama" + QIANFAN = "qianfan" # Baidu BCE def __missing__(self, key): return self.OPENAI @@ -36,13 +37,18 @@ class LLMConfig(YamlModel): Optional Fields in pydantic: https://docs.pydantic.dev/latest/migration/#required-optional-and-nullable-fields """ - api_key: str + api_key: str = "sk-" api_type: LLMType = LLMType.OPENAI base_url: str = "https://api.openai.com/v1" api_version: Optional[str] = None model: Optional[str] = None # also stands for DEPLOYMENT_NAME + # For Cloud Service Provider like Baidu/ Alibaba + access_key: Optional[str] = None + secret_key: Optional[str] = None + endpoint: Optional[str] = None # for self-deployed model on the cloud + # For Spark(Xunfei), maybe remove later app_id: Optional[str] = None api_secret: Optional[str] = None diff --git a/metagpt/provider/__init__.py b/metagpt/provider/__init__.py index 675734811..8c0aab836 100644 --- a/metagpt/provider/__init__.py +++ b/metagpt/provider/__init__.py @@ -16,6 +16,7 @@ from metagpt.provider.azure_openai_api import AzureOpenAILLM from metagpt.provider.metagpt_api import MetaGPTLLM from metagpt.provider.human_provider import HumanProvider from metagpt.provider.spark_api import SparkLLM +from metagpt.provider.qianfan_api import QianFanLLM __all__ = [ "FireworksLLM", @@ -28,4 +29,5 @@ __all__ = [ "OllamaLLM", "HumanProvider", "SparkLLM", + "QianFanLLM", ] diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index b144471b5..d3d9c829b 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -67,6 +67,22 @@ class BaseLLM(ABC): def _default_system_msg(self): return self._system_msg(self.system_prompt) + def _update_costs(self, usage: dict, model: str = None, local_calc_usage: bool = True): + """update each request's token cost + Args: + model (str): model name or in some scenarios called endpoint + local_calc_usage (bool): some models don't calculate usage, it will overwrite calc_usage + """ + calc_usage = self.config.calc_usage and local_calc_usage + model = model if model else self.model + if calc_usage and self.cost_manager: + try: + prompt_tokens = int(usage.get("prompt_tokens", 0)) + completion_tokens = int(usage.get("completion_tokens", 0)) + self.cost_manager.update_cost(prompt_tokens, completion_tokens, model) + except Exception as e: + logger.error(f"{self.__class__.__name__} updats costs failed! exp: {e}") + async def aask( self, msg: str, diff --git a/metagpt/provider/qianfan_api.py b/metagpt/provider/qianfan_api.py new file mode 100644 index 000000000..180935e61 --- /dev/null +++ b/metagpt/provider/qianfan_api.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : llm api of qianfan from Baidu, supports ERNIE(wen xin yi yan) and opensource models +import copy +import os + +import qianfan +from qianfan.resources.typing import JsonBody +from tenacity import ( + after_log, + retry, + retry_if_exception_type, + stop_after_attempt, + wait_random_exponential, +) + +from metagpt.configs.llm_config import LLMConfig, LLMType +from metagpt.logs import log_llm_stream, logger +from metagpt.provider.base_llm import BaseLLM +from metagpt.provider.llm_provider_registry import register_provider +from metagpt.provider.openai_api import log_and_reraise +from metagpt.utils.cost_manager import CostManager +from metagpt.utils.token_counter import ( + QianFan_EndPoint_TOKEN_COSTS, + QianFan_MODEL_TOKEN_COSTS, +) + + +@register_provider(LLMType.QIANFAN) +class QianFanLLM(BaseLLM): + """ + Refs + Auth: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/3lmokh7n6#%E3%80%90%E6%8E%A8%E8%8D%90%E3%80%91%E4%BD%BF%E7%94%A8%E5%AE%89%E5%85%A8%E8%AE%A4%E8%AF%81aksk%E9%89%B4%E6%9D%83%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B + Token Price: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9 + Models: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/wlmhm7vuo#%E5%AF%B9%E8%AF%9Dchat + https://cloud.baidu.com/doc/WENXINWORKSHOP/s/xlmokikxe#%E6%94%AF%E6%8C%81%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8 + """ + + def __init__(self, config: LLMConfig): + self.config = config + self.use_system_prompt = False # only some ERNIE-x related models support system_prompt + self.__init_qianfan() + self.cost_manager = CostManager(token_costs=self.token_costs) + + def __init_qianfan(self): + if self.config.access_key and self.config.secret_key: + # for system level auth, use access_key and secret_key, recommended by official + # set environment variable due to official recommendation + os.environ.setdefault("QIANFAN_ACCESS_KEY", self.config.access_key) + os.environ.setdefault("QIANFAN_SECRET_KEY", self.config.secret_key) + elif self.config.api_key and self.config.secret_key: + # for application level auth, use api_key and secret_key + # set environment variable due to official recommendation + os.environ.setdefault("QIANFAN_AK", self.config.api_key) + os.environ.setdefault("QIANFAN_SK", self.config.secret_key) + else: + raise ValueError("Set the `access_key`&`secret_key` or `api_key`&`secret_key` first") + + support_system_pairs = [ + ("ERNIE-Bot-4", "completions_pro"), # (model, corresponding-endpoint) + ("ERNIE-Bot-8k", "ernie_bot_8k"), + ("ERNIE-Bot", "completions"), + ("ERNIE-Bot-turbo", "eb-instant"), + ("ERNIE-Speed", "ernie_speed"), + ("EB-turbo-AppBuilder", "ai_apaas"), + ] + if self.config.model in [pair[0] for pair in support_system_pairs]: + # only some ERNIE models support + self.use_system_prompt = True + if self.config.endpoint in [pair[1] for pair in support_system_pairs]: + self.use_system_prompt = True + + assert not (self.config.model and self.config.endpoint), "Only set `model` or `endpoint` in the config" + assert self.config.model or self.config.endpoint, "Should set one of `model` or `endpoint` in the config" + + self.token_costs = copy.deepcopy(QianFan_MODEL_TOKEN_COSTS) + self.token_costs.update(QianFan_EndPoint_TOKEN_COSTS) + + # self deployed model on the cloud not to calculate usage, it charges resource pool rental fee + self.calc_usage = self.config.calc_usage and self.config.endpoint is None + self.client = qianfan.ChatCompletion() + + def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict: + kwargs = { + "messages": messages, + "stream": stream, + } + if self.config.temperature > 0: + # different model has default temperature. only set when it's specified. + kwargs["temperature"] = self.config.temperature + if self.config.endpoint: + kwargs["endpoint"] = self.config.endpoint + elif self.config.model: + kwargs["model"] = self.config.model + + if self.use_system_prompt: + # if the model support system prompt, extract and pass it + if messages[0]["role"] == "system": + kwargs["messages"] = messages[1:] + kwargs["system"] = messages[0]["content"] # set system prompt here + return kwargs + + def _update_costs(self, usage: dict): + """update each request's token cost""" + model_or_endpoint = self.config.model if self.config.model else self.config.endpoint + local_calc_usage = True if model_or_endpoint in self.token_costs else False + super()._update_costs(usage, model_or_endpoint, local_calc_usage) + + def get_choice_text(self, resp: JsonBody) -> str: + return resp.get("result", "") + + def completion(self, messages: list[dict]) -> JsonBody: + resp = self.client.do(**self._const_kwargs(messages=messages, stream=False)) + self._update_costs(resp.body.get("usage", {})) + return resp.body + + async def _achat_completion(self, messages: list[dict]) -> JsonBody: + resp = await self.client.ado(**self._const_kwargs(messages=messages, stream=False)) + self._update_costs(resp.body.get("usage", {})) + return resp.body + + async def acompletion(self, messages: list[dict], timeout=3) -> JsonBody: + return await self._achat_completion(messages) + + async def _achat_completion_stream(self, messages: list[dict]) -> str: + resp = await self.client.ado(**self._const_kwargs(messages=messages, stream=True)) + collected_content = [] + usage = {} + async for chunk in resp: + content = chunk.body.get("result", "") + usage = chunk.body.get("usage", {}) + log_llm_stream(content) + collected_content.append(content) + log_llm_stream("\n") + + self._update_costs(usage) + full_content = "".join(collected_content) + return full_content + + @retry( + stop=stop_after_attempt(3), + wait=wait_random_exponential(min=1, max=60), + after=after_log(logger, logger.level("WARNING").name), + retry=retry_if_exception_type(ConnectionError), + retry_error_callback=log_and_reraise, + ) + async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str: + if stream: + return await self._achat_completion_stream(messages) + resp = await self._achat_completion(messages) + return self.get_choice_text(resp) diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py index 7bf5154b6..e1c0f415b 100644 --- a/metagpt/utils/cost_manager.py +++ b/metagpt/utils/cost_manager.py @@ -29,6 +29,7 @@ class CostManager(BaseModel): total_budget: float = 0 max_budget: float = 10.0 total_cost: float = 0 + token_costs: dict[str, dict[str, float]] = TOKEN_COSTS def update_cost(self, prompt_tokens, completion_tokens, model): """ @@ -42,7 +43,8 @@ class CostManager(BaseModel): self.total_prompt_tokens += prompt_tokens self.total_completion_tokens += completion_tokens cost = ( - prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"] + prompt_tokens * self.token_costs[model]["prompt"] + + completion_tokens * self.token_costs[model]["completion"] ) / 1000 self.total_cost += cost logger.info( diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index a0fb3b70d..b69ec73d3 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -38,6 +38,59 @@ TOKEN_COSTS = { } +""" +QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9 +Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method. +""" +QianFan_MODEL_TOKEN_COSTS = { + "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017}, + "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067}, + "ERNIE-Bot": {"prompt": 0.017, "completion": 0.017}, + "ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011}, + "EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011}, + "ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011}, + "BLOOMZ-7B": {"prompt": 0.00056, "completion": 0.00056}, + "Llama-2-7B-Chat": {"prompt": 0.00056, "completion": 0.00056}, + "Llama-2-13B-Chat": {"prompt": 0.00084, "completion": 0.00084}, + "Llama-2-70B-Chat": {"prompt": 0.0049, "completion": 0.0049}, + "ChatGLM2-6B-32K": {"prompt": 0.00056, "completion": 0.00056}, + "AquilaChat-7B": {"prompt": 0.00056, "completion": 0.00056}, + "Mixtral-8x7B-Instruct": {"prompt": 0.0049, "completion": 0.0049}, + "SQLCoder-7B": {"prompt": 0.00056, "completion": 0.00056}, + "CodeLlama-7B-Instruct": {"prompt": 0.00056, "completion": 0.00056}, + "XuanYuan-70B-Chat-4bit": {"prompt": 0.0049, "completion": 0.0049}, + "Qianfan-BLOOMZ-7B-compressed": {"prompt": 0.00056, "completion": 0.00056}, + "Qianfan-Chinese-Llama-2-7B": {"prompt": 0.00056, "completion": 0.00056}, + "Qianfan-Chinese-Llama-2-13B": {"prompt": 0.00084, "completion": 0.00084}, + "ChatLaw": {"prompt": 0.0011, "completion": 0.0011}, + "Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0}, +} + +QianFan_EndPoint_TOKEN_COSTS = { + "completions_pro": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-4"], + "ernie_bot_8k": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"], + "completions": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot"], + "eb-instant": QianFan_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"], + "ai_apaas": QianFan_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"], + "ernie_speed": QianFan_MODEL_TOKEN_COSTS["ERNIE-Speed"], + "bloomz_7b1": QianFan_MODEL_TOKEN_COSTS["BLOOMZ-7B"], + "llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"], + "llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"], + "llama_2_70b": QianFan_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"], + "chatglm2_6b_32k": QianFan_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"], + "aquilachat_7b": QianFan_MODEL_TOKEN_COSTS["AquilaChat-7B"], + "mixtral_8x7b_instruct": QianFan_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"], + "sqlcoder_7b": QianFan_MODEL_TOKEN_COSTS["SQLCoder-7B"], + "codellama_7b_instruct": QianFan_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"], + "xuanyuan_70b_chat": QianFan_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"], + "qianfan_bloomz_7b_compressed": QianFan_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"], + "qianfan_chinese_llama_2_7b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"], + "qianfan_chinese_llama_2_13b": QianFan_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"], + "chatlaw": QianFan_MODEL_TOKEN_COSTS["ChatLaw"], + "yi_34b_chat": QianFan_MODEL_TOKEN_COSTS["Yi-34B-Chat"], +} + + TOKEN_MAX = { "gpt-3.5-turbo": 4096, "gpt-3.5-turbo-0301": 4096, diff --git a/requirements.txt b/requirements.txt index 6cb25d52b..c893bd713 100644 --- a/requirements.txt +++ b/requirements.txt @@ -67,3 +67,4 @@ google-generativeai==0.3.2 anytree ipywidgets==8.1.1 Pillow +qianfan==0.3.1