mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-29 15:59:42 +02:00
Merge branch 'feat_qianfan' of github.com:better629/MetaGPT into feat_qianfan
This commit is contained in:
commit
9061a5714e
18 changed files with 69 additions and 34 deletions
|
|
@ -133,7 +133,7 @@ class CollectLinks(Action):
|
|||
if len(remove) == 0:
|
||||
break
|
||||
|
||||
model_name = config.get_openai_llm().model
|
||||
model_name = config.llm.model
|
||||
prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
|
||||
logger.debug(prompt)
|
||||
queries = await self._aask(prompt, [system_text])
|
||||
|
|
|
|||
|
|
@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
|
|||
self, result: requests.Response, stream: bool
|
||||
) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
|
||||
"""Returns the response(s) and a bool indicating whether it is a stream."""
|
||||
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
|
||||
content_type = result.headers.get("Content-Type", "")
|
||||
if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
|
||||
return (
|
||||
self._interpret_response_line(line, result.status_code, result.headers, stream=True)
|
||||
for line in parse_stream(result.iter_lines())
|
||||
|
|
|
|||
|
|
@ -233,14 +233,16 @@ class OpenAILLM(BaseLLM):
|
|||
usage.prompt_tokens = count_message_tokens(messages, self.model)
|
||||
usage.completion_tokens = count_string_tokens(rsp, self.model)
|
||||
except Exception as e:
|
||||
logger.error(f"usage calculation failed: {e}")
|
||||
logger.warning(f"usage calculation failed: {e}")
|
||||
|
||||
return usage
|
||||
|
||||
def _get_max_tokens(self, messages: list[dict]):
|
||||
if not self.auto_max_tokens:
|
||||
return self.config.max_token
|
||||
return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
|
||||
# FIXME
|
||||
# https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
|
||||
return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)
|
||||
|
||||
@handle_exception
|
||||
async def amoderation(self, content: Union[str, list[str]]):
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@
|
|||
# @Desc : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
|
||||
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import openai
|
||||
import zhipuai
|
||||
from requests import ConnectionError
|
||||
from tenacity import (
|
||||
after_log,
|
||||
|
|
@ -14,6 +13,7 @@ from tenacity import (
|
|||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
)
|
||||
from zhipuai.types.chat.chat_completion import Completion
|
||||
|
||||
from metagpt.configs.llm_config import LLMConfig, LLMType
|
||||
from metagpt.logs import log_llm_stream, logger
|
||||
|
|
@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
|
|||
from metagpt.provider.llm_provider_registry import register_provider
|
||||
from metagpt.provider.openai_api import log_and_reraise
|
||||
from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
|
||||
class ZhiPuEvent(Enum):
|
||||
|
|
@ -38,27 +39,22 @@ class ZhiPuAILLM(BaseLLM):
|
|||
"""
|
||||
|
||||
def __init__(self, config: LLMConfig):
|
||||
self.__init_zhipuai(config)
|
||||
self.llm = ZhiPuModelAPI
|
||||
self.model = "chatglm_turbo" # so far only one model, just use it
|
||||
self.use_system_prompt: bool = False # zhipuai has no system prompt when use api
|
||||
self.config = config
|
||||
self.__init_zhipuai()
|
||||
self.cost_manager: Optional[CostManager] = None
|
||||
|
||||
def __init_zhipuai(self, config: LLMConfig):
|
||||
assert config.api_key
|
||||
zhipuai.api_key = config.api_key
|
||||
# due to use openai sdk, set the api_key but it will't be used.
|
||||
# openai.api_key = zhipuai.api_key # due to use openai sdk, set the api_key but it will't be used.
|
||||
if config.proxy:
|
||||
# FIXME: openai v1.x sdk has no proxy support
|
||||
openai.proxy = config.proxy
|
||||
def __init_zhipuai(self):
|
||||
assert self.config.api_key
|
||||
self.api_key = self.config.api_key
|
||||
self.model = self.config.model # so far, it support glm-3-turbo、glm-4
|
||||
self.llm = ZhiPuModelAPI(api_key=self.api_key)
|
||||
|
||||
def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
|
||||
kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
|
||||
return kwargs
|
||||
|
||||
def completion(self, messages: list[dict], timeout=3) -> dict:
|
||||
resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
|
||||
resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
|
||||
usage = resp.usage.model_dump()
|
||||
self._update_costs(usage)
|
||||
return resp.model_dump()
|
||||
|
|
|
|||
|
|
@ -72,11 +72,7 @@ class CodeInterpreter(Role):
|
|||
if ReviewConst.CHANGE_WORDS[0] in review:
|
||||
counter = 0 # redo the task again with help of human suggestions
|
||||
|
||||
py_code = (
|
||||
code["code"] if code.get("language") == "python" else ""
|
||||
) # use python code as final code; for markdown, return the rendered result instead of the code itself
|
||||
|
||||
return py_code, result, success
|
||||
return code["code"], result, success
|
||||
|
||||
async def _write_code(self):
|
||||
todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
|
||||
|
|
|
|||
|
|
@ -42,6 +42,10 @@ class CostManager(BaseModel):
|
|||
"""
|
||||
self.total_prompt_tokens += prompt_tokens
|
||||
self.total_completion_tokens += completion_tokens
|
||||
if model not in TOKEN_COSTS:
|
||||
logger.warning(f"Model {model} not found in TOKEN_COSTS.")
|
||||
return
|
||||
|
||||
cost = (
|
||||
prompt_tokens * self.token_costs[model]["prompt"]
|
||||
+ completion_tokens * self.token_costs[model]["completion"]
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ def reduce_message_length(
|
|||
"""
|
||||
max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
|
||||
for msg in msgs:
|
||||
if count_string_tokens(msg, model_name) < max_token:
|
||||
if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
|
||||
return msg
|
||||
|
||||
raise RuntimeError("fail to reduce message length")
|
||||
|
|
@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
|
|||
continue
|
||||
ret = ["".join(j) for j in _split_by_count(sentences, count)]
|
||||
return ret
|
||||
return _split_by_count(paragraph, count)
|
||||
return list(_split_by_count(paragraph, count))
|
||||
|
||||
|
||||
def decode_unicode_escape(text: str) -> str:
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ TOKEN_COSTS = {
|
|||
"gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator
|
||||
"gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
|
||||
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
|
||||
"glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
|
||||
"glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
|
||||
"glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
|
||||
"glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
|
||||
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
|
||||
}
|
||||
|
||||
|
|
@ -140,7 +140,8 @@ TOKEN_MAX = {
|
|||
"gpt-4-vision-preview": 128000,
|
||||
"gpt-4-1106-vision-preview": 128000,
|
||||
"text-embedding-ada-002": 8192,
|
||||
"chatglm_turbo": 32768,
|
||||
"glm-3-turbo": 128000,
|
||||
"glm-4": 128000,
|
||||
"gemini-pro": 32768,
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue