Merge branch 'main' into incremental_development

# Conflicts: # metagpt/schema.py
2026-04-27 17:56:23 +02:00 · 2024-01-19 19:53:17 +08:00 · 2024-01-19 19:53:17 +08:00 · e1b783ca14
commit e1b783ca14
parent bed3040ff3 5389c52556
127 changed files with 2346 additions and 648 deletions
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -407,6 +407,10 @@ def concat_namespace(*args) -> str:
    return ":".join(str(value) for value in args)


+def split_namespace(ns_class_name: str) -> List[str]:
+    return ns_class_name.split(":")
+
+
 def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]:
    """
    Generates a logging function to be used after a call is retried.
@ -546,3 +550,20 @@ async def read_file_block(filename: str | Path, lineno: int, end_lineno: int):
                break
            lines.append(line)
    return "".join(lines)
+
+
+def list_files(root: str | Path) -> List[Path]:
+    files = []
+    try:
+        directory_path = Path(root)
+        if not directory_path.exists():
+            return []
+        for file_path in directory_path.iterdir():
+            if file_path.is_file():
+                files.append(file_path)
+            else:
+                subfolder_files = list_files(root=file_path)
+                files.extend(subfolder_files)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+    return files
--- a/metagpt/utils/di_graph_repository.py
+++ b/metagpt/utils/di_graph_repository.py
@ -12,9 +12,9 @@ import json
 from pathlib import Path
 from typing import List

-import aiofiles
 import networkx

+from metagpt.utils.common import aread, awrite
 from metagpt.utils.graph_repository import SPO, GraphRepository


@ -55,12 +55,10 @@ class DiGraphRepository(GraphRepository):
        if not path.exists():
            path.mkdir(parents=True, exist_ok=True)
        pathname = Path(path) / self.name
-        async with aiofiles.open(str(pathname.with_suffix(".json")), mode="w", encoding="utf-8") as writer:
-            await writer.write(data)
+        await awrite(filename=pathname.with_suffix(".json"), data=data, encoding="utf-8")

    async def load(self, pathname: str | Path):
-        async with aiofiles.open(str(pathname), mode="r", encoding="utf-8") as reader:
-            data = await reader.read(-1)
+        data = await aread(filename=pathname, encoding="utf-8")
        m = json.loads(data)
        self._repo = networkx.node_link_graph(m)

--- a/metagpt/utils/file_repository.py
+++ b/metagpt/utils/file_repository.py
@ -55,6 +55,7 @@ class FileRepository:
        """
        pathname = self.workdir / filename
        pathname.parent.mkdir(parents=True, exist_ok=True)
+        content = content if content else ""  # avoid `argument must be str, not None` to make it continue
        async with aiofiles.open(str(pathname), mode="w") as writer:
            await writer.write(content)
        logger.info(f"save to: {str(pathname)}")
@ -138,6 +139,8 @@ class FileRepository:
        files = self._git_repo.changed_files
        relative_files = {}
        for p, ct in files.items():
+            if ct.value == "D":  # deleted
+                continue
            try:
                rf = Path(p).relative_to(self._relative_path)
            except ValueError:
--- a/metagpt/utils/graph_repository.py
+++ b/metagpt/utils/graph_repository.py
@ -13,19 +13,25 @@ from typing import List

 from pydantic import BaseModel

-from metagpt.repo_parser import ClassInfo, RepoFileInfo
+from metagpt.logs import logger
+from metagpt.repo_parser import ClassInfo, ClassRelationship, RepoFileInfo
 from metagpt.utils.common import concat_namespace


 class GraphKeyword:
    IS = "is"
+    OF = "Of"
+    ON = "On"
    CLASS = "class"
    FUNCTION = "function"
+    HAS_FUNCTION = "has_function"
    SOURCE_CODE = "source_code"
    NULL = "<null>"
    GLOBAL_VARIABLE = "global_variable"
    CLASS_FUNCTION = "class_function"
    CLASS_PROPERTY = "class_property"
+    HAS_CLASS_FUNCTION = "has_class_function"
+    HAS_CLASS_PROPERTY = "has_class_property"
    HAS_CLASS = "has_class"
    HAS_PAGE_INFO = "has_page_info"
    HAS_CLASS_VIEW = "has_class_view"
@ -73,11 +79,13 @@ class GraphRepository(ABC):
        await graph_db.insert(subject=file_info.file, predicate=GraphKeyword.IS, object_=file_type)
        for c in file_info.classes:
            class_name = c.get("name", "")
+            # file -> class
            await graph_db.insert(
                subject=file_info.file,
                predicate=GraphKeyword.HAS_CLASS,
                object_=concat_namespace(file_info.file, class_name),
            )
+            # class detail
            await graph_db.insert(
                subject=concat_namespace(file_info.file, class_name),
                predicate=GraphKeyword.IS,
@ -85,12 +93,22 @@ class GraphRepository(ABC):
            )
            methods = c.get("methods", [])
            for fn in methods:
+                await graph_db.insert(
+                    subject=concat_namespace(file_info.file, class_name),
+                    predicate=GraphKeyword.HAS_CLASS_FUNCTION,
+                    object_=concat_namespace(file_info.file, class_name, fn),
+                )
                await graph_db.insert(
                    subject=concat_namespace(file_info.file, class_name, fn),
                    predicate=GraphKeyword.IS,
                    object_=GraphKeyword.CLASS_FUNCTION,
                )
        for f in file_info.functions:
+            # file -> function
+            await graph_db.insert(
+                subject=file_info.file, predicate=GraphKeyword.HAS_FUNCTION, object_=concat_namespace(file_info.file, f)
+            )
+            # function detail
            await graph_db.insert(
                subject=concat_namespace(file_info.file, f), predicate=GraphKeyword.IS, object_=GraphKeyword.FUNCTION
            )
@ -105,30 +123,37 @@ class GraphRepository(ABC):
                await graph_db.insert(
                    subject=concat_namespace(file_info.file, *code_block.tokens),
                    predicate=GraphKeyword.HAS_PAGE_INFO,
-                    object_=code_block.json(ensure_ascii=False),
+                    object_=code_block.model_dump_json(),
                )
            for k, v in code_block.properties.items():
                await graph_db.insert(
                    subject=concat_namespace(file_info.file, k, v),
                    predicate=GraphKeyword.HAS_PAGE_INFO,
-                    object_=code_block.json(ensure_ascii=False),
+                    object_=code_block.model_dump_json(),
                )

    @staticmethod
    async def update_graph_db_with_class_views(graph_db: "GraphRepository", class_views: List[ClassInfo]):
        for c in class_views:
-            filename, class_name = c.package.split(":", 1)
+            filename, _ = c.package.split(":", 1)
            await graph_db.insert(subject=filename, predicate=GraphKeyword.IS, object_=GraphKeyword.SOURCE_CODE)
            file_types = {".py": "python", ".js": "javascript"}
            file_type = file_types.get(Path(filename).suffix, GraphKeyword.NULL)
            await graph_db.insert(subject=filename, predicate=GraphKeyword.IS, object_=file_type)
-            await graph_db.insert(subject=filename, predicate=GraphKeyword.HAS_CLASS, object_=class_name)
+            await graph_db.insert(subject=filename, predicate=GraphKeyword.HAS_CLASS, object_=c.package)
            await graph_db.insert(
                subject=c.package,
                predicate=GraphKeyword.IS,
                object_=GraphKeyword.CLASS,
            )
            for vn, vt in c.attributes.items():
+                # class -> property
+                await graph_db.insert(
+                    subject=c.package,
+                    predicate=GraphKeyword.HAS_CLASS_PROPERTY,
+                    object_=concat_namespace(c.package, vn),
+                )
+                # property detail
                await graph_db.insert(
                    subject=concat_namespace(c.package, vn),
                    predicate=GraphKeyword.IS,
@ -138,6 +163,15 @@ class GraphRepository(ABC):
                    subject=concat_namespace(c.package, vn), predicate=GraphKeyword.HAS_TYPE_DESC, object_=vt
                )
            for fn, desc in c.methods.items():
+                if "</I>" in desc and "<I>" not in desc:
+                    logger.error(desc)
+                # class -> function
+                await graph_db.insert(
+                    subject=c.package,
+                    predicate=GraphKeyword.HAS_CLASS_FUNCTION,
+                    object_=concat_namespace(c.package, fn),
+                )
+                # function detail
                await graph_db.insert(
                    subject=concat_namespace(c.package, fn),
                    predicate=GraphKeyword.IS,
@ -148,3 +182,19 @@ class GraphRepository(ABC):
                    predicate=GraphKeyword.HAS_ARGS_DESC,
                    object_=desc,
                )
+
+    @staticmethod
+    async def update_graph_db_with_class_relationship_views(
+        graph_db: "GraphRepository", relationship_views: List[ClassRelationship]
+    ):
+        for r in relationship_views:
+            await graph_db.insert(
+                subject=r.src, predicate=GraphKeyword.IS + r.relationship + GraphKeyword.OF, object_=r.dest
+            )
+            if not r.label:
+                continue
+            await graph_db.insert(
+                subject=r.src,
+                predicate=GraphKeyword.IS + r.relationship + GraphKeyword.ON,
+                object_=concat_namespace(r.dest, r.label),
+            )
--- a/metagpt/utils/repair_llm_raw_output.py
+++ b/metagpt/utils/repair_llm_raw_output.py
@ -120,6 +120,15 @@ def repair_json_format(output: str) -> str:
    elif output.startswith("{") and output.endswith("]"):
        output = output[:-1] + "}"

+    # remove `#` in output json str, usually appeared in `glm-4`
+    arr = output.split("\n")
+    new_arr = []
+    for line in arr:
+        idx = line.find("#")
+        if idx >= 0:
+            line = line[:idx]
+        new_arr.append(line)
+    output = "\n".join(new_arr)
    return output


@ -168,15 +177,17 @@ def repair_invalid_json(output: str, error: str) -> str:
        example 1. json.decoder.JSONDecodeError: Expecting ',' delimiter: line 154 column 1 (char 2765)
        example 2. xxx.JSONDecodeError: Expecting property name enclosed in double quotes: line 14 column 1 (char 266)
    """
-    pattern = r"line ([0-9]+)"
+    pattern = r"line ([0-9]+) column ([0-9]+)"

    matches = re.findall(pattern, error, re.DOTALL)
    if len(matches) > 0:
-        line_no = int(matches[0]) - 1
+        line_no = int(matches[0][0]) - 1
+        col_no = int(matches[0][1]) - 1

        # due to CustomDecoder can handle `"": ''` or `'': ""`, so convert `"""` -> `"`, `'''` -> `'`
        output = output.replace('"""', '"').replace("'''", '"')
        arr = output.split("\n")
+        rline = arr[line_no]  # raw line
        line = arr[line_no].strip()
        # different general problems
        if line.endswith("],"):
@ -187,9 +198,12 @@ def repair_invalid_json(output: str, error: str) -> str:
            new_line = line.replace("}", "")
        elif line.endswith("},") and output.endswith("},"):
            new_line = line[:-1]
-        elif '",' not in line and "," not in line:
+        elif (rline[col_no] in ["'", '"']) and (line.startswith('"') or line.startswith("'")) and "," not in line:
+            # problem, `"""` or `'''` without `,`
+            new_line = f",{line}"
+        elif '",' not in line and "," not in line and '"' not in line:
            new_line = f'{line}",'
-        elif "," not in line:
+        elif not line.endswith(","):
            # problem, miss char `,` at the end.
            new_line = f"{line},"
        elif "," in line and len(line) == 1:
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -27,7 +27,8 @@ TOKEN_COSTS = {
    "gpt-4-0613": {"prompt": 0.06, "completion": 0.12},
    "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "chatglm_turbo": {"prompt": 0.0, "completion": 0.00069},  # 32k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
    "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
 }