1. 动作优化

1. SummarizeCode动作：用于基于代码进行总结，思考bug、逻辑、todo 2. CodeReview动作优化：目前强制要求回答问题，有更高的成功率了 1. 增加了LGTM/LBTM的回答，在LGTM时会及时停止，不重写代码 2. 目前增加了设置中的参数code_review_k_times，与reflexion类似，设置为2 3. 仍然有概率发生指令不遵循，尤其是会有比较高的概率发生同时review多个代码文件，还没想好怎么解决 #FIXME 3. 增加了env到Action结构中，现在可以直接调用环境接口了 4. WriteDesign：去除了对project_name的纠正代码，现在引导下可以一次生成对 1. 修改了提示词中的##格式，改为了JSON格式 2. 数据结构 1. Document的标准化：Env->Repo->Document，其中Document/Asset/Code都是Document 1. 原用于检索的Document改为IndexableDocument 2. Repo结构引入：用于Document装载与元数据装载 3. RepoParser引入：写了一个简单的AST parser（后续可能要换tree-sitter），给出了整库symbol 4. Env中增加了set/get/set_doc/get_doc接口，用于set/get单个变量或者一个Document。这个逻辑后续或许会进一步简化 3. 配置优化 1. 默认更换为gpt-4-1106-preview，以获得最好的效果与成本 2. 提供~/.metagpt作为配置最高优先级目录，从中读取config.yaml 3. workspace可以灵活指定了，在config中配置 4. project_name可以由命令行指定，并且改为由ProductManager生成 4. metagpt作为默认命令行，而非python startup.py metagpt --help metagpt --project-name game_2048 "make a 2048 game" metagpt "make a 2048 game" metagpt --project-name game_2048 --inc "将2048改为4096" metagpt --project-name game_2048 --auto-inc "make a 2048 game" 1. 使用新的METAGPT_ROOT生成方式，而非寻找git，以便cli安装 2. 命令行由fire换为了typer，它会带来相对更好的体验 3. project_name可以灵活指定了，在metagpt命令行输入中配置 5. 其他 1. 现在支持多国语言了，中文已测试 2. BossRequirement -> UserRequirement 3. 大量错误文本的修正，增加了可读性 4. 中量提示词优化，稍微提升了一些准确率 5. 暂时屏蔽了LongtermMemory相关逻辑，这个逻辑底层调用了langchain的FAISS，会带来~5秒加载耗时 6. 修复了安装包中的部分描述错误 7. 去除了config中在openai_proxy设定时对base的重复修改，这个修改应该在openai初始化时发生 8. 修复了JSON在中文存储时的特定问题，ensure_ascii=False
2026-04-25 08:46:48 +02:00 · 2023-11-27 15:36:50 +08:00 · 2023-11-27 15:36:50 +08:00 · 22288a342d
commit 22288a342d
parent 715a1d874a
24 changed files with 359 additions and 203 deletions
--- a/metagpt/document.py
+++ b/metagpt/document.py
@ -5,7 +5,7 @@
@Author  : alexanderwu
@File    : document.py
 """
-
+from enum import Enum
 from typing import Union, Optional
 from pathlib import Path
 from pydantic import BaseModel, Field
@ -18,7 +18,9 @@ from langchain.document_loaders import (
 from langchain.text_splitter import CharacterTextSplitter
 from tqdm import tqdm

+from metagpt.config import CONFIG
 from metagpt.logs import logger
+from metagpt.repo_parser import RepoParser


 def validate_cols(content_col: str, df: pd.DataFrame):
@ -48,42 +50,56 @@ def read_data(data_path: Path):
    return data


+class DocumentStatus(Enum):
+    """Indicates document status, a mechanism similar to RFC/PEP"""
+    DRAFT = "draft"
+    UNDERREVIEW = "underreview"
+    APPROVED = "approved"
+    DONE = "done"
+
+
 class Document(BaseModel):
    """
    Document: Handles operations related to document files.
    """
-    content: str = Field(default='')
-    file_path: Path = Field(default=None)
+    path: Path = Field(default=None)
+    name: str = Field(default="")
+    content: str = Field(default="")
+
+    # metadata? in content perhaps.
+    author: str = Field(default="")
+    status: DocumentStatus = Field(default=DocumentStatus.DRAFT)
+    reviews: list = Field(default_factory=list)

    @classmethod
-    def from_path(cls, file_path: Path):
+    def from_path(cls, path: Path):
        """
        Create a Document instance from a file path.
        """
-        if not file_path.exists():
-            raise FileNotFoundError(f"File {file_path} not found.")
-        content = file_path.read_text()
-        return cls(content=content, file_path=file_path)
+        if not path.exists():
+            raise FileNotFoundError(f"File {path} not found.")
+        content = path.read_text()
+        return cls(content=content, path=path)

    @classmethod
-    def from_text(cls, text: str, file_path: Optional[Path] = None):
+    def from_text(cls, text: str, path: Optional[Path] = None):
        """
        Create a Document from a text string.
        """
-        return cls(content=text, file_path=file_path)
+        return cls(content=text, path=path)

-    def to_path(self, file_path: Optional[Path] = None):
+    def to_path(self, path: Optional[Path] = None):
        """
        Save content to the specified file path.
        """
-        if file_path is not None:
-            self.file_path = file_path
+        if path is not None:
+            self.path = path

-        if self.file_path is None:
+        if self.path is None:
            raise ValueError("File path is not set.")

-        self.file_path.parent.mkdir(parents=True, exist_ok=True)
-        self.file_path.write_text(self.content)
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self.path.write_text(self.content, encoding="utf-8")

    def persist(self):
        """
@ -140,25 +156,35 @@ class IndexableDocument(Document):
            raise NotImplementedError("Data type not supported for metadata extraction.")


+class RepoMetadata(BaseModel):
+
+    name: str = Field(default="")
+    n_docs: int = Field(default=0)
+    n_chars: int = Field(default=0)
+    symbols: list = Field(default_factory=list)
+
+
 class Repo(BaseModel):

    # Name of this repo.
    name: str = Field(default="")
+    # metadata: RepoMetadata = Field(default=RepoMetadata)
    docs: dict[Path, Document] = Field(default_factory=dict)
    codes: dict[Path, Document] = Field(default_factory=dict)
    assets: dict[Path, Document] = Field(default_factory=dict)
-    repo_path: Path = Field(default_factory=Path)
+    path: Path = Field(default=None)

    def _path(self, filename):
-        return self.repo_path / filename
+        return self.path / filename

    @classmethod
-    def from_path(cls, repo_path: Path):
+    def from_path(cls, path: Path):
        """Load documents, code, and assets from a repository path."""
-        repo_path.mkdir(parents=True, exist_ok=True)
-        repo = Repo(repo_path = repo_path)
-        for file_path in repo_path.rglob('*'):
-            if file_path.is_file():
+        path.mkdir(parents=True, exist_ok=True)
+        repo = Repo(path=path, name=path.name)
+        for file_path in path.rglob('*'):
+            # FIXME: These judgments are difficult to support multiple programming languages and need to be more general
+            if file_path.is_file() and file_path.suffix in [".json", ".txt", ".md", ".py", ".js", ".css", ".html"]:
                repo._set(file_path.read_text(), file_path)
        return repo

@ -171,23 +197,24 @@ class Repo(BaseModel):
        for asset in self.assets.values():
            asset.to_path()

-    def _set(self, content: str, file_path: Path):
+    def _set(self, content: str, path: Path):
        """Add a document to the appropriate category based on its file extension."""
-        file_ext = file_path.suffix
+        suffix = path.suffix
+        doc = Document(content=content, path=path, name=str(path.relative_to(self.path)))

-        doc = Document(content=content, file_path=file_path)
-        if file_ext.lower() == '.md':
-            self.docs[file_path] = doc
-        elif file_ext.lower() in ['.py', '.js', '.css', '.html']:
-            self.codes[file_path] = doc
+        # FIXME: These judgments are difficult to support multiple programming languages and need to be more general
+        if suffix.lower() == '.md':
+            self.docs[path] = doc
+        elif suffix.lower() in ['.py', '.js', '.css', '.html']:
+            self.codes[path] = doc
        else:
-            self.assets[file_path] = doc
+            self.assets[path] = doc
        return doc

    def set(self, content: str, filename: str):
        """Set a document and persist it to disk."""
-        file_path = self._path(filename)
-        doc = self._set(content, file_path)
+        path = self._path(filename)
+        doc = self._set(content, path)
        doc.to_path()

    def get(self, filename: str) -> Optional[Document]:
@ -195,13 +222,32 @@ class Repo(BaseModel):
        path = self._path(filename)
        return self.docs.get(path) or self.codes.get(path) or self.assets.get(path)

+    def get_text_documents(self) -> list[Document]:
+        return list(self.docs.values()) + list(self.codes.values())

-def main():
-    repo1 = Repo.from_path(Path("/Users/alexanderwu/workspace/t1"))
+    def eda(self) -> RepoMetadata:
+        n_docs = sum(len(i) for i in [self.docs, self.codes, self.assets])
+        n_chars = sum(sum(len(j.content) for j in i.values()) for i in [self.docs, self.codes, self.assets])
+        symbols = RepoParser(base_directory=self.path).generate_symbols()
+        return RepoMetadata(name=self.name, n_docs=n_docs, n_chars=n_chars, symbols=symbols)
+
+
+def set_existing_repo(path=CONFIG.workspace_path / "t1"):
+    repo1 = Repo.from_path(path)
    repo1.set("wtf content", "doc/wtf_file.md")
    repo1.set("wtf code", "code/wtf_file.py")
    logger.info(repo1)  # check doc


+def load_existing_repo(path=CONFIG.workspace_path / "web_tetris"):
+    repo = Repo.from_path(path)
+    logger.info(repo)
+    logger.info(repo.eda())
+
+
+def main():
+    load_existing_repo()
+
+
 if __name__ == '__main__':
    main()