roadmap 2.2

2026-06-26 15:49:42 +02:00 · 2023-07-12 11:37:24 +08:00 · 2023-07-12 11:37:24 +08:00 · d3d249e844
commit d3d249e844
parent 10a12e0906
11 changed files with 361 additions and 31 deletions
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -10,7 +10,7 @@ import ast
 import inspect
 import re

-from typing import Union
+from typing import Union, List, Tuple
 from metagpt.logs import logger
 from langchain.schema import AgentAction, AgentFinish, OutputParserException

@ -27,6 +27,112 @@ def check_cmd_exists(command) -> int:
    return result


+class OutputParser:
+
+    @classmethod
+    def parse_blocks(cls, text: str):
+        # 首先根据"##"将文本分割成不同的block
+        blocks = text.split("##")
+
+        # 创建一个字典，用于存储每个block的标题和内容
+        block_dict = {}
+
+        # 遍历所有的block
+        for block in blocks:
+            # 如果block不为空，则继续处理
+            if block.strip() != "":
+                # 将block的标题和内容分开，并分别去掉前后的空白字符
+                block_title, block_content = block.split("\n", 1)
+                # LLM可能出错，在这里做一下修正
+                if block_title[-1] == ":":
+                    block_title = block_title[:-1]
+                block_dict[block_title.strip()] = block_content.strip()
+
+        return block_dict
+
+    @classmethod
+    def parse_code(cls, text: str, lang: str = "") -> str:
+        pattern = rf'```{lang}.*?\s+(.*?)```'
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            code = match.group(1)
+        else:
+            raise Exception
+        return code
+
+    @classmethod
+    def parse_str(cls, text: str):
+        text = text.split("=")[-1]
+        text = text.strip().strip("'").strip("\"")
+        return text
+
+    @classmethod
+    def parse_file_list(cls, text: str) -> list[str]:
+        # Regular expression pattern to find the tasks list.
+        pattern = r'\s*(.*=.*)?(\[.*\])'
+
+        # Extract tasks list string using regex.
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            tasks_list_str = match.group(2)
+
+            # Convert string representation of list to a Python list using ast.literal_eval.
+            tasks = ast.literal_eval(tasks_list_str)
+        else:
+            raise Exception
+        return tasks
+
+    @classmethod
+    def parse_data(cls, data):
+        block_dict = cls.parse_blocks(data)
+        parsed_data = {}
+        for block, content in block_dict.items():
+            # 尝试去除code标记
+            try:
+                content = cls.parse_code(text=content)
+            except Exception:
+                pass
+
+            # 尝试解析list
+            try:
+                content = cls.parse_file_list(text=content)
+            except Exception:
+                pass
+            parsed_data[block] = content
+        return parsed_data
+
+    @classmethod
+    def parse_data_with_mapping(cls, data, mapping):
+        block_dict = cls.parse_blocks(data)
+        parsed_data = {}
+        for block, content in block_dict.items():
+            # 尝试去除code标记
+            try:
+                content = cls.parse_code(text=content)
+            except Exception:
+                pass
+            typing_define = mapping.get(block, None)
+            if isinstance(typing_define, tuple):
+                typing = typing_define[0]
+            else:
+                typing = typing_define
+            if typing == List[str] or typing == List[Tuple[str, str]]:
+                # 尝试解析list
+                try:
+                    content = cls.parse_file_list(text=content)
+                except Exception:
+                    pass
+            # TODO: 多余的引号去除有风险，后期再解决
+            # elif typing == str:
+            #     # 尝试去除多余的引号
+            #     try:
+            #         content = cls.parse_str(text=content)
+            #     except Exception:
+            #         pass
+            parsed_data[block] = content
+        return parsed_data
+
+
 class CodeParser:

    @classmethod
@ -56,7 +162,7 @@ class CodeParser:
        return block_dict

    @classmethod
-    def parse_code(cls, block: str, text: str, lang: str="") -> str:
+    def parse_code(cls, block: str, text: str, lang: str = "") -> str:
        if block:
            text = cls.parse_block(block, text)
        pattern = rf'```{lang}.*?\s+(.*?)```'
@ -70,16 +176,17 @@ class CodeParser:
        return code

    @classmethod
-    def parse_str(cls, block: str, text: str, lang: str=""):
+    def parse_str(cls, block: str, text: str, lang: str = ""):
        code = cls.parse_code(block, text, lang)
        code = code.split("=")[-1]
        code = code.strip().strip("'").strip("\"")
        return code

    @classmethod
-    def parse_file_list(cls, block: str, text: str, lang: str="") -> list[str]:
+    def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]:
        # Regular expression pattern to find the tasks list.
        code = cls.parse_code(block, text, lang)
+        print(code)
        pattern = r'\s*(.*=.*)?(\[.*\])'

        # Extract tasks list string using regex.
@ -96,6 +203,7 @@ class CodeParser:

 class NoMoneyException(Exception):
    """Raised when the operation cannot be completed due to insufficient funds"""
+
    def __init__(self, amount, message="Insufficient funds"):
        self.amount = amount
        self.message = message
@ -154,4 +262,4 @@ if __name__ == '__main__':
    logger.info(rsp)

    rsp = parser.parse(final_answer_sample)
-    logger.info(rsp)
+    logger.info(rsp)