Merge branch 'feat_qianfan' of github.com:better629/MetaGPT into feat_qianfan

This commit is contained in:
better629 2024-02-07 22:50:52 +08:00
commit 9061a5714e
18 changed files with 69 additions and 34 deletions

View file

@ -133,7 +133,7 @@ class CollectLinks(Action):
if len(remove) == 0:
break
model_name = config.get_openai_llm().model
model_name = config.llm.model
prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
logger.debug(prompt)
queries = await self._aask(prompt, [system_text])

View file

@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
self, result: requests.Response, stream: bool
) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
"""Returns the response(s) and a bool indicating whether it is a stream."""
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
content_type = result.headers.get("Content-Type", "")
if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
return (
self._interpret_response_line(line, result.status_code, result.headers, stream=True)
for line in parse_stream(result.iter_lines())

View file

@ -233,14 +233,16 @@ class OpenAILLM(BaseLLM):
usage.prompt_tokens = count_message_tokens(messages, self.model)
usage.completion_tokens = count_string_tokens(rsp, self.model)
except Exception as e:
logger.error(f"usage calculation failed: {e}")
logger.warning(f"usage calculation failed: {e}")
return usage
def _get_max_tokens(self, messages: list[dict]):
if not self.auto_max_tokens:
return self.config.max_token
return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
# FIXME
# https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)
@handle_exception
async def amoderation(self, content: Union[str, list[str]]):

View file

@ -3,9 +3,8 @@
# @Desc : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
from enum import Enum
from typing import Optional
import openai
import zhipuai
from requests import ConnectionError
from tenacity import (
after_log,
@ -14,6 +13,7 @@ from tenacity import (
stop_after_attempt,
wait_random_exponential,
)
from zhipuai.types.chat.chat_completion import Completion
from metagpt.configs.llm_config import LLMConfig, LLMType
from metagpt.logs import log_llm_stream, logger
@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
from metagpt.provider.llm_provider_registry import register_provider
from metagpt.provider.openai_api import log_and_reraise
from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
from metagpt.utils.cost_manager import CostManager
class ZhiPuEvent(Enum):
@ -38,27 +39,22 @@ class ZhiPuAILLM(BaseLLM):
"""
def __init__(self, config: LLMConfig):
self.__init_zhipuai(config)
self.llm = ZhiPuModelAPI
self.model = "chatglm_turbo" # so far only one model, just use it
self.use_system_prompt: bool = False # zhipuai has no system prompt when use api
self.config = config
self.__init_zhipuai()
self.cost_manager: Optional[CostManager] = None
def __init_zhipuai(self, config: LLMConfig):
assert config.api_key
zhipuai.api_key = config.api_key
# due to use openai sdk, set the api_key but it will't be used.
# openai.api_key = zhipuai.api_key # due to use openai sdk, set the api_key but it will't be used.
if config.proxy:
# FIXME: openai v1.x sdk has no proxy support
openai.proxy = config.proxy
def __init_zhipuai(self):
assert self.config.api_key
self.api_key = self.config.api_key
self.model = self.config.model # so far, it support glm-3-turbo、glm-4
self.llm = ZhiPuModelAPI(api_key=self.api_key)
def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
return kwargs
def completion(self, messages: list[dict], timeout=3) -> dict:
resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
usage = resp.usage.model_dump()
self._update_costs(usage)
return resp.model_dump()

View file

@ -72,11 +72,7 @@ class CodeInterpreter(Role):
if ReviewConst.CHANGE_WORDS[0] in review:
counter = 0 # redo the task again with help of human suggestions
py_code = (
code["code"] if code.get("language") == "python" else ""
) # use python code as final code; for markdown, return the rendered result instead of the code itself
return py_code, result, success
return code["code"], result, success
async def _write_code(self):
todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)

View file

@ -42,6 +42,10 @@ class CostManager(BaseModel):
"""
self.total_prompt_tokens += prompt_tokens
self.total_completion_tokens += completion_tokens
if model not in TOKEN_COSTS:
logger.warning(f"Model {model} not found in TOKEN_COSTS.")
return
cost = (
prompt_tokens * self.token_costs[model]["prompt"]
+ completion_tokens * self.token_costs[model]["completion"]

View file

@ -25,7 +25,7 @@ def reduce_message_length(
"""
max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
for msg in msgs:
if count_string_tokens(msg, model_name) < max_token:
if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
return msg
raise RuntimeError("fail to reduce message length")
@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
continue
ret = ["".join(j) for j in _split_by_count(sentences, count)]
return ret
return _split_by_count(paragraph, count)
return list(_split_by_count(paragraph, count))
def decode_unicode_escape(text: str) -> str:

View file

@ -32,8 +32,8 @@ TOKEN_COSTS = {
"gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator
"gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
"glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
"glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
"glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
"glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
}
@ -140,7 +140,8 @@ TOKEN_MAX = {
"gpt-4-vision-preview": 128000,
"gpt-4-1106-vision-preview": 128000,
"text-embedding-ada-002": 8192,
"chatglm_turbo": 32768,
"glm-3-turbo": 128000,
"glm-4": 128000,
"gemini-pro": 32768,
}