feat: merge geekan:main

2026-07-23 17:01:08 +02:00 · 2023-12-22 16:40:04 +08:00 · 2023-12-22 16:40:04 +08:00 · 9a1909bb95
commit 9a1909bb95
parent 33031648bd 139c7c363f
139 changed files with 4649 additions and 1504 deletions
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -13,15 +13,21 @@ from __future__ import annotations

 import ast
 import contextlib
+import importlib
 import inspect
+import json
 import os
 import platform
 import re
+import sys
+import traceback
+import typing
 from pathlib import Path
-from typing import Callable, List, Tuple, Union
+from typing import Any, Callable, List, Tuple, Union, get_args, get_origin

 import aiofiles
 import loguru
+from pydantic.json import pydantic_encoder
 from tenacity import RetryCallState, _utils

 from metagpt.config import CONFIG
@ -43,6 +49,12 @@ def check_cmd_exists(command) -> int:
    return result


+def require_python_version(req_version: tuple[int]) -> bool:
+    if not (2 <= len(req_version) <= 3):
+        raise ValueError("req_version should be (3, 9) or (3, 10, 13)")
+    return True if sys.version_info > req_version else False
+
+
 class OutputParser:
    @classmethod
    def parse_blocks(cls, text: str):
@ -130,8 +142,32 @@ class OutputParser:
            parsed_data[block] = content
        return parsed_data

+    @staticmethod
+    def extract_content(text, tag="CONTENT"):
+        # Use regular expression to extract content between [CONTENT] and [/CONTENT]
+        extracted_content = re.search(rf"\[{tag}\](.*?)\[/{tag}\]", text, re.DOTALL)
+
+        if extracted_content:
+            return extracted_content.group(1).strip()
+        else:
+            return "No content found between [CONTENT] and [/CONTENT] tags."
+
+    @staticmethod
+    def is_supported_list_type(i):
+        origin = get_origin(i)
+        if origin is not List:
+            return False
+
+        args = get_args(i)
+        if args == (str,) or args == (Tuple[str, str],) or args == (List[str],):
+            return True
+
+        return False
+
    @classmethod
    def parse_data_with_mapping(cls, data, mapping):
+        if "[CONTENT]" in data:
+            data = cls.extract_content(text=data)
        block_dict = cls.parse_blocks(data)
        parsed_data = {}
        for block, content in block_dict.items():
@ -198,7 +234,7 @@ class OutputParser:
                result = ast.literal_eval(structure_text)

                # Ensure the result matches the specified data type
-                if isinstance(result, list) or isinstance(result, dict):
+                if isinstance(result, (list, dict)):
                    return result

                raise ValueError(f"The extracted structure is not a {data_type}.")
@ -437,6 +473,81 @@ def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> typing.C
    return log_it


+def read_json_file(json_file: str, encoding=None) -> list[Any]:
+    if not Path(json_file).exists():
+        raise FileNotFoundError(f"json_file: {json_file} not exist, return []")
+
+    with open(json_file, "r", encoding=encoding) as fin:
+        try:
+            data = json.load(fin)
+        except Exception:
+            raise ValueError(f"read json file: {json_file} failed")
+    return data
+
+
+def write_json_file(json_file: str, data: list, encoding=None):
+    folder_path = Path(json_file).parent
+    if not folder_path.exists():
+        folder_path.mkdir(parents=True, exist_ok=True)
+
+    with open(json_file, "w", encoding=encoding) as fout:
+        json.dump(data, fout, ensure_ascii=False, indent=4, default=pydantic_encoder)
+
+
+def import_class(class_name: str, module_name: str) -> type:
+    module = importlib.import_module(module_name)
+    a_class = getattr(module, class_name)
+    return a_class
+
+
+def import_class_inst(class_name: str, module_name: str, *args, **kwargs) -> object:
+    a_class = import_class(class_name, module_name)
+    class_inst = a_class(*args, **kwargs)
+    return class_inst
+
+
+def format_trackback_info(limit: int = 2):
+    return traceback.format_exc(limit=limit)
+
+
+def serialize_decorator(func):
+    async def wrapper(self, *args, **kwargs):
+        try:
+            result = await func(self, *args, **kwargs)
+            return result
+        except KeyboardInterrupt:
+            logger.error(f"KeyboardInterrupt occurs, start to serialize the project, exp:\n{format_trackback_info()}")
+        except Exception:
+            logger.error(f"Exception occurs, start to serialize the project, exp:\n{format_trackback_info()}")
+        self.serialize()  # Team.serialize
+
+    return wrapper
+
+
+def role_raise_decorator(func):
+    async def wrapper(self, *args, **kwargs):
+        try:
+            return await func(self, *args, **kwargs)
+        except KeyboardInterrupt as kbi:
+            logger.error(f"KeyboardInterrupt: {kbi} occurs, start to serialize the project")
+            if self.latest_observed_msg:
+                self._rc.memory.delete(self.latest_observed_msg)
+            # raise again to make it captured outside
+            raise Exception(format_trackback_info(limit=None))
+        except Exception:
+            if self.latest_observed_msg:
+                logger.warning(
+                    "There is a exception in role's execution, in order to resume, "
+                    "we delete the newest role communication message in the role's memory."
+                )
+                # remove role newest observed msg to make it observed again
+                self._rc.memory.delete(self.latest_observed_msg)
+            # raise again to make it captured outside
+            raise Exception(format_trackback_info(limit=None))
+
+    return wrapper
+
+
@handle_exception
 async def aread(file_path: str) -> str:
    """Read file asynchronously."""
--- a/metagpt/utils/exceptions.py
+++ b/metagpt/utils/exceptions.py
@ -21,6 +21,7 @@ def handle_exception(
    _func: Callable[..., ReturnType] = None,
    *,
    exception_type: Union[Type[Exception], Tuple[Type[Exception], ...]] = Exception,
+    exception_msg: str = "",
    default_return: Any = None,
 ) -> Callable[..., ReturnType]:
    """handle exception, return default value"""
@ -32,8 +33,9 @@ def handle_exception(
                return await func(*args, **kwargs)
            except exception_type as e:
                logger.opt(depth=1).error(
-                    f"Calling {func.__name__} with args: {args}, kwargs: {kwargs} failed: {e}, "
-                    f"stack: {traceback.format_exc()}"
+                    f"{e}: {exception_msg}, "
+                    f"\nCalling {func.__name__} with args: {args}, kwargs: {kwargs} "
+                    f"\nStack: {traceback.format_exc()}"
                )
                return default_return

--- a/metagpt/utils/get_template.py
+++ b/metagpt/utils/get_template.py
@ -8,10 +8,10 @@
 from metagpt.config import CONFIG


-def get_template(templates, format=CONFIG.prompt_format):
-    selected_templates = templates.get(format)
+def get_template(templates, schema=CONFIG.prompt_schema):
+    selected_templates = templates.get(schema)
    if selected_templates is None:
-        raise ValueError(f"Can't find {format} in passed in templates")
+        raise ValueError(f"Can't find {schema} in passed in templates")

    # Extract the selected templates
    prompt_template = selected_templates["PROMPT_TEMPLATE"]
--- a/metagpt/utils/make_sk_kernel.py
+++ b/metagpt/utils/make_sk_kernel.py
@ -21,14 +21,12 @@ def make_sk_kernel():
    if CONFIG.openai_api_type == "azure":
        kernel.add_chat_service(
            "chat_completion",
-            AzureChatCompletion(CONFIG.deployment_name, CONFIG.openai_api_base, CONFIG.openai_api_key),
+            AzureChatCompletion(CONFIG.deployment_name, CONFIG.openai_base_url, CONFIG.openai_api_key),
        )
    else:
        kernel.add_chat_service(
            "chat_completion",
-            OpenAIChatCompletion(
-                CONFIG.openai_api_model, CONFIG.openai_api_key, org_id=None, endpoint=CONFIG.openai_api_base
-            ),
+            OpenAIChatCompletion(CONFIG.openai_api_model, CONFIG.openai_api_key),
        )

    return kernel
--- a/metagpt/utils/repair_llm_raw_output.py
+++ b/metagpt/utils/repair_llm_raw_output.py
@ -253,7 +253,7 @@ def retry_parse_json_text(output: str) -> Union[list, dict]:
        if CONFIG.repair_llm_output is True, the _aask_v1 and the retry_parse_json_text will loop for {x=3*3} times.
            it's a two-layer retry cycle
    """
-    logger.debug(f"output to json decode:\n{output}")
+    # logger.debug(f"output to json decode:\n{output}")

    # if CONFIG.repair_llm_output is True, it will try to fix output until the retry break
    parsed_data = CustomDecoder(strict=False).decode(output)
--- a/metagpt/utils/serialize.py
+++ b/metagpt/utils/serialize.py
@ -4,13 +4,11 @@

 import copy
 import pickle
-from typing import Dict, List

-from metagpt.actions.action_output import ActionOutput
-from metagpt.schema import Message
+from metagpt.utils.common import import_class


-def actionoutout_schema_to_mapping(schema: Dict) -> Dict:
+def actionoutout_schema_to_mapping(schema: dict) -> dict:
    """
    directly traverse the `properties` in the first level.
    schema structure likes
@ -35,14 +33,31 @@ def actionoutout_schema_to_mapping(schema: Dict) -> Dict:
        if property["type"] == "string":
            mapping[field] = (str, ...)
        elif property["type"] == "array" and property["items"]["type"] == "string":
-            mapping[field] = (List[str], ...)
+            mapping[field] = (list[str], ...)
        elif property["type"] == "array" and property["items"]["type"] == "array":
-            # here only consider the `List[List[str]]` situation
-            mapping[field] = (List[List[str]], ...)
+            # here only consider the `list[list[str]]` situation
+            mapping[field] = (list[list[str]], ...)
    return mapping


-def serialize_message(message: Message):
+def actionoutput_mapping_to_str(mapping: dict) -> dict:
+    new_mapping = {}
+    for key, value in mapping.items():
+        new_mapping[key] = str(value)
+    return new_mapping
+
+
+def actionoutput_str_to_mapping(mapping: dict) -> dict:
+    new_mapping = {}
+    for key, value in mapping.items():
+        if value == "(<class 'str'>, Ellipsis)":
+            new_mapping[key] = (str, ...)
+        else:
+            new_mapping[key] = eval(value)  # `"'(list[str], Ellipsis)"` to `(list[str], ...)`
+    return new_mapping
+
+
+def serialize_message(message: "Message"):
    message_cp = copy.deepcopy(message)  # avoid `instruct_content` value update by reference
    ic = message_cp.instruct_content
    if ic:
@ -56,11 +71,12 @@ def serialize_message(message: Message):
    return msg_ser


-def deserialize_message(message_ser: str) -> Message:
+def deserialize_message(message_ser: str) -> "Message":
    message = pickle.loads(message_ser)
    if message.instruct_content:
        ic = message.instruct_content
-        ic_obj = ActionOutput.create_model_class(class_name=ic["class"], mapping=ic["mapping"])
+        actionnode_class = import_class("ActionNode", "metagpt.actions.action_node")  # avoid circular import
+        ic_obj = actionnode_class.create_model_class(class_name=ic["class"], mapping=ic["mapping"])
        ic_new = ic_obj(**ic["value"])
        message.instruct_content = ic_new

--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -7,6 +7,7 @@
 ref1: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
 ref2: https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/llm/token_counter.py
 ref3: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/openai.py
+ref4: https://ai.google.dev/models/gemini
 """
 import tiktoken

@ -16,6 +17,8 @@ TOKEN_COSTS = {
    "gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002},
    "gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
    "gpt-3.5-turbo-16k-0613": {"prompt": 0.003, "completion": 0.004},
+    "gpt-35-turbo": {"prompt": 0.0015, "completion": 0.002},
+    "gpt-35-turbo-16k": {"prompt": 0.003, "completion": 0.004},
    "gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
    "gpt-4": {"prompt": 0.03, "completion": 0.06},
@ -25,6 +28,7 @@ TOKEN_COSTS = {
    "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
    "chatglm_turbo": {"prompt": 0.0, "completion": 0.00069},  # 32k version, prompt + completion tokens=0.005￥/k-tokens
+    "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
 }


@ -34,6 +38,8 @@ TOKEN_MAX = {
    "gpt-3.5-turbo-0613": 4096,
    "gpt-3.5-turbo-16k": 16384,
    "gpt-3.5-turbo-16k-0613": 16384,
+    "gpt-35-turbo": 4096,
+    "gpt-35-turbo-16k": 16384,
    "gpt-3.5-turbo-1106": 16384,
    "gpt-4-0314": 8192,
    "gpt-4": 8192,
@ -43,6 +49,7 @@ TOKEN_MAX = {
    "gpt-4-1106-preview": 128000,
    "text-embedding-ada-002": 8192,
    "chatglm_turbo": 32768,
+    "gemini-pro": 32768,
 }


@ -56,6 +63,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
+        "gpt-35-turbo",
+        "gpt-35-turbo-16k",
        "gpt-3.5-turbo-16k",
        "gpt-3.5-turbo-1106",
        "gpt-4-0314",