feat: merge geekan:main

This commit is contained in:
莘权 马 2023-12-22 16:40:04 +08:00
commit 9a1909bb95
139 changed files with 4649 additions and 1504 deletions

View file

@ -13,15 +13,21 @@ from __future__ import annotations
import ast
import contextlib
import importlib
import inspect
import json
import os
import platform
import re
import sys
import traceback
import typing
from pathlib import Path
from typing import Callable, List, Tuple, Union
from typing import Any, Callable, List, Tuple, Union, get_args, get_origin
import aiofiles
import loguru
from pydantic.json import pydantic_encoder
from tenacity import RetryCallState, _utils
from metagpt.config import CONFIG
@ -43,6 +49,12 @@ def check_cmd_exists(command) -> int:
return result
def require_python_version(req_version: tuple[int]) -> bool:
if not (2 <= len(req_version) <= 3):
raise ValueError("req_version should be (3, 9) or (3, 10, 13)")
return True if sys.version_info > req_version else False
class OutputParser:
@classmethod
def parse_blocks(cls, text: str):
@ -130,8 +142,32 @@ class OutputParser:
parsed_data[block] = content
return parsed_data
@staticmethod
def extract_content(text, tag="CONTENT"):
# Use regular expression to extract content between [CONTENT] and [/CONTENT]
extracted_content = re.search(rf"\[{tag}\](.*?)\[/{tag}\]", text, re.DOTALL)
if extracted_content:
return extracted_content.group(1).strip()
else:
return "No content found between [CONTENT] and [/CONTENT] tags."
@staticmethod
def is_supported_list_type(i):
origin = get_origin(i)
if origin is not List:
return False
args = get_args(i)
if args == (str,) or args == (Tuple[str, str],) or args == (List[str],):
return True
return False
@classmethod
def parse_data_with_mapping(cls, data, mapping):
if "[CONTENT]" in data:
data = cls.extract_content(text=data)
block_dict = cls.parse_blocks(data)
parsed_data = {}
for block, content in block_dict.items():
@ -198,7 +234,7 @@ class OutputParser:
result = ast.literal_eval(structure_text)
# Ensure the result matches the specified data type
if isinstance(result, list) or isinstance(result, dict):
if isinstance(result, (list, dict)):
return result
raise ValueError(f"The extracted structure is not a {data_type}.")
@ -437,6 +473,81 @@ def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> typing.C
return log_it
def read_json_file(json_file: str, encoding=None) -> list[Any]:
if not Path(json_file).exists():
raise FileNotFoundError(f"json_file: {json_file} not exist, return []")
with open(json_file, "r", encoding=encoding) as fin:
try:
data = json.load(fin)
except Exception:
raise ValueError(f"read json file: {json_file} failed")
return data
def write_json_file(json_file: str, data: list, encoding=None):
folder_path = Path(json_file).parent
if not folder_path.exists():
folder_path.mkdir(parents=True, exist_ok=True)
with open(json_file, "w", encoding=encoding) as fout:
json.dump(data, fout, ensure_ascii=False, indent=4, default=pydantic_encoder)
def import_class(class_name: str, module_name: str) -> type:
module = importlib.import_module(module_name)
a_class = getattr(module, class_name)
return a_class
def import_class_inst(class_name: str, module_name: str, *args, **kwargs) -> object:
a_class = import_class(class_name, module_name)
class_inst = a_class(*args, **kwargs)
return class_inst
def format_trackback_info(limit: int = 2):
return traceback.format_exc(limit=limit)
def serialize_decorator(func):
async def wrapper(self, *args, **kwargs):
try:
result = await func(self, *args, **kwargs)
return result
except KeyboardInterrupt:
logger.error(f"KeyboardInterrupt occurs, start to serialize the project, exp:\n{format_trackback_info()}")
except Exception:
logger.error(f"Exception occurs, start to serialize the project, exp:\n{format_trackback_info()}")
self.serialize() # Team.serialize
return wrapper
def role_raise_decorator(func):
async def wrapper(self, *args, **kwargs):
try:
return await func(self, *args, **kwargs)
except KeyboardInterrupt as kbi:
logger.error(f"KeyboardInterrupt: {kbi} occurs, start to serialize the project")
if self.latest_observed_msg:
self._rc.memory.delete(self.latest_observed_msg)
# raise again to make it captured outside
raise Exception(format_trackback_info(limit=None))
except Exception:
if self.latest_observed_msg:
logger.warning(
"There is a exception in role's execution, in order to resume, "
"we delete the newest role communication message in the role's memory."
)
# remove role newest observed msg to make it observed again
self._rc.memory.delete(self.latest_observed_msg)
# raise again to make it captured outside
raise Exception(format_trackback_info(limit=None))
return wrapper
@handle_exception
async def aread(file_path: str) -> str:
"""Read file asynchronously."""

View file

@ -21,6 +21,7 @@ def handle_exception(
_func: Callable[..., ReturnType] = None,
*,
exception_type: Union[Type[Exception], Tuple[Type[Exception], ...]] = Exception,
exception_msg: str = "",
default_return: Any = None,
) -> Callable[..., ReturnType]:
"""handle exception, return default value"""
@ -32,8 +33,9 @@ def handle_exception(
return await func(*args, **kwargs)
except exception_type as e:
logger.opt(depth=1).error(
f"Calling {func.__name__} with args: {args}, kwargs: {kwargs} failed: {e}, "
f"stack: {traceback.format_exc()}"
f"{e}: {exception_msg}, "
f"\nCalling {func.__name__} with args: {args}, kwargs: {kwargs} "
f"\nStack: {traceback.format_exc()}"
)
return default_return

View file

@ -8,10 +8,10 @@
from metagpt.config import CONFIG
def get_template(templates, format=CONFIG.prompt_format):
selected_templates = templates.get(format)
def get_template(templates, schema=CONFIG.prompt_schema):
selected_templates = templates.get(schema)
if selected_templates is None:
raise ValueError(f"Can't find {format} in passed in templates")
raise ValueError(f"Can't find {schema} in passed in templates")
# Extract the selected templates
prompt_template = selected_templates["PROMPT_TEMPLATE"]

View file

@ -21,14 +21,12 @@ def make_sk_kernel():
if CONFIG.openai_api_type == "azure":
kernel.add_chat_service(
"chat_completion",
AzureChatCompletion(CONFIG.deployment_name, CONFIG.openai_api_base, CONFIG.openai_api_key),
AzureChatCompletion(CONFIG.deployment_name, CONFIG.openai_base_url, CONFIG.openai_api_key),
)
else:
kernel.add_chat_service(
"chat_completion",
OpenAIChatCompletion(
CONFIG.openai_api_model, CONFIG.openai_api_key, org_id=None, endpoint=CONFIG.openai_api_base
),
OpenAIChatCompletion(CONFIG.openai_api_model, CONFIG.openai_api_key),
)
return kernel

View file

@ -253,7 +253,7 @@ def retry_parse_json_text(output: str) -> Union[list, dict]:
if CONFIG.repair_llm_output is True, the _aask_v1 and the retry_parse_json_text will loop for {x=3*3} times.
it's a two-layer retry cycle
"""
logger.debug(f"output to json decode:\n{output}")
# logger.debug(f"output to json decode:\n{output}")
# if CONFIG.repair_llm_output is True, it will try to fix output until the retry break
parsed_data = CustomDecoder(strict=False).decode(output)

View file

@ -4,13 +4,11 @@
import copy
import pickle
from typing import Dict, List
from metagpt.actions.action_output import ActionOutput
from metagpt.schema import Message
from metagpt.utils.common import import_class
def actionoutout_schema_to_mapping(schema: Dict) -> Dict:
def actionoutout_schema_to_mapping(schema: dict) -> dict:
"""
directly traverse the `properties` in the first level.
schema structure likes
@ -35,14 +33,31 @@ def actionoutout_schema_to_mapping(schema: Dict) -> Dict:
if property["type"] == "string":
mapping[field] = (str, ...)
elif property["type"] == "array" and property["items"]["type"] == "string":
mapping[field] = (List[str], ...)
mapping[field] = (list[str], ...)
elif property["type"] == "array" and property["items"]["type"] == "array":
# here only consider the `List[List[str]]` situation
mapping[field] = (List[List[str]], ...)
# here only consider the `list[list[str]]` situation
mapping[field] = (list[list[str]], ...)
return mapping
def serialize_message(message: Message):
def actionoutput_mapping_to_str(mapping: dict) -> dict:
new_mapping = {}
for key, value in mapping.items():
new_mapping[key] = str(value)
return new_mapping
def actionoutput_str_to_mapping(mapping: dict) -> dict:
new_mapping = {}
for key, value in mapping.items():
if value == "(<class 'str'>, Ellipsis)":
new_mapping[key] = (str, ...)
else:
new_mapping[key] = eval(value) # `"'(list[str], Ellipsis)"` to `(list[str], ...)`
return new_mapping
def serialize_message(message: "Message"):
message_cp = copy.deepcopy(message) # avoid `instruct_content` value update by reference
ic = message_cp.instruct_content
if ic:
@ -56,11 +71,12 @@ def serialize_message(message: Message):
return msg_ser
def deserialize_message(message_ser: str) -> Message:
def deserialize_message(message_ser: str) -> "Message":
message = pickle.loads(message_ser)
if message.instruct_content:
ic = message.instruct_content
ic_obj = ActionOutput.create_model_class(class_name=ic["class"], mapping=ic["mapping"])
actionnode_class = import_class("ActionNode", "metagpt.actions.action_node") # avoid circular import
ic_obj = actionnode_class.create_model_class(class_name=ic["class"], mapping=ic["mapping"])
ic_new = ic_obj(**ic["value"])
message.instruct_content = ic_new

View file

@ -7,6 +7,7 @@
ref1: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
ref2: https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/llm/token_counter.py
ref3: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/openai.py
ref4: https://ai.google.dev/models/gemini
"""
import tiktoken
@ -16,6 +17,8 @@ TOKEN_COSTS = {
"gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002},
"gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
"gpt-3.5-turbo-16k-0613": {"prompt": 0.003, "completion": 0.004},
"gpt-35-turbo": {"prompt": 0.0015, "completion": 0.002},
"gpt-35-turbo-16k": {"prompt": 0.003, "completion": 0.004},
"gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
"gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
"gpt-4": {"prompt": 0.03, "completion": 0.06},
@ -25,6 +28,7 @@ TOKEN_COSTS = {
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
"chatglm_turbo": {"prompt": 0.0, "completion": 0.00069}, # 32k version, prompt + completion tokens=0.005¥/k-tokens
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
}
@ -34,6 +38,8 @@ TOKEN_MAX = {
"gpt-3.5-turbo-0613": 4096,
"gpt-3.5-turbo-16k": 16384,
"gpt-3.5-turbo-16k-0613": 16384,
"gpt-35-turbo": 4096,
"gpt-35-turbo-16k": 16384,
"gpt-3.5-turbo-1106": 16384,
"gpt-4-0314": 8192,
"gpt-4": 8192,
@ -43,6 +49,7 @@ TOKEN_MAX = {
"gpt-4-1106-preview": 128000,
"text-embedding-ada-002": 8192,
"chatglm_turbo": 32768,
"gemini-pro": 32768,
}
@ -56,6 +63,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-35-turbo",
"gpt-35-turbo-16k",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-1106",
"gpt-4-0314",