1. 动作优化

1. SummarizeCode动作:用于基于代码进行总结,思考bug、逻辑、todo
  2. CodeReview动作优化:目前强制要求回答问题,有更高的成功率了
    1. 增加了LGTM/LBTM的回答,在LGTM时会及时停止,不重写代码
    2. 目前增加了设置中的参数code_review_k_times,与reflexion类似,设置为2
    3. 仍然有概率发生指令不遵循,尤其是会有比较高的概率发生同时review多个代码文件,还没想好怎么解决 #FIXME
  3. 增加了env到Action结构中,现在可以直接调用环境接口了
  4. WriteDesign:去除了对project_name的纠正代码,现在引导下可以一次生成对
    1. 修改了提示词中的##格式,改为了JSON格式
2. 数据结构
  1. Document的标准化:Env->Repo->Document,其中Document/Asset/Code都是Document
    1. 原用于检索的Document改为IndexableDocument
  2. Repo结构引入:用于Document装载与元数据装载
  3. RepoParser引入:写了一个简单的AST parser(后续可能要换tree-sitter),给出了整库symbol
  4. Env中增加了set/get/set_doc/get_doc接口,用于set/get单个变量或者一个Document。这个逻辑后续或许会进一步简化
3. 配置优化
  1. 默认更换为gpt-4-1106-preview,以获得最好的效果与成本
  2. 提供~/.metagpt作为配置最高优先级目录,从中读取config.yaml
  3. workspace可以灵活指定了,在config中配置
  4. project_name可以由命令行指定,并且改为由ProductManager生成
4. metagpt作为默认命令行,而非python startup.py
metagpt --help

metagpt --project-name game_2048 "make a 2048 game"
metagpt "make a 2048 game"

metagpt --project-name game_2048 --inc "将2048改为4096"

metagpt --project-name game_2048 --auto-inc "make a 2048 game"
  1. 使用新的METAGPT_ROOT生成方式,而非寻找git,以便cli安装
  2. 命令行由fire换为了typer,它会带来相对更好的体验
  3. project_name可以灵活指定了,在metagpt命令行输入中配置
5. 其他
  1. 现在支持多国语言了,中文已测试
  2. BossRequirement -> UserRequirement
  3. 大量错误文本的修正,增加了可读性
  4. 中量提示词优化,稍微提升了一些准确率
  5. 暂时屏蔽了LongtermMemory相关逻辑,这个逻辑底层调用了langchain的FAISS,会带来~5秒加载耗时
  6. 修复了安装包中的部分描述错误
  7. 去除了config中在openai_proxy设定时对base的重复修改,这个修改应该在openai初始化时发生
  8. 修复了JSON在中文存储时的特定问题,ensure_ascii=False
This commit is contained in:
geekan 2023-11-27 15:36:50 +08:00
parent 715a1d874a
commit 22288a342d
24 changed files with 359 additions and 203 deletions

View file

@ -5,7 +5,7 @@
@Author : alexanderwu
@File : document.py
"""
from enum import Enum
from typing import Union, Optional
from pathlib import Path
from pydantic import BaseModel, Field
@ -18,7 +18,9 @@ from langchain.document_loaders import (
from langchain.text_splitter import CharacterTextSplitter
from tqdm import tqdm
from metagpt.config import CONFIG
from metagpt.logs import logger
from metagpt.repo_parser import RepoParser
def validate_cols(content_col: str, df: pd.DataFrame):
@ -48,42 +50,56 @@ def read_data(data_path: Path):
return data
class DocumentStatus(Enum):
"""Indicates document status, a mechanism similar to RFC/PEP"""
DRAFT = "draft"
UNDERREVIEW = "underreview"
APPROVED = "approved"
DONE = "done"
class Document(BaseModel):
"""
Document: Handles operations related to document files.
"""
content: str = Field(default='')
file_path: Path = Field(default=None)
path: Path = Field(default=None)
name: str = Field(default="")
content: str = Field(default="")
# metadata? in content perhaps.
author: str = Field(default="")
status: DocumentStatus = Field(default=DocumentStatus.DRAFT)
reviews: list = Field(default_factory=list)
@classmethod
def from_path(cls, file_path: Path):
def from_path(cls, path: Path):
"""
Create a Document instance from a file path.
"""
if not file_path.exists():
raise FileNotFoundError(f"File {file_path} not found.")
content = file_path.read_text()
return cls(content=content, file_path=file_path)
if not path.exists():
raise FileNotFoundError(f"File {path} not found.")
content = path.read_text()
return cls(content=content, path=path)
@classmethod
def from_text(cls, text: str, file_path: Optional[Path] = None):
def from_text(cls, text: str, path: Optional[Path] = None):
"""
Create a Document from a text string.
"""
return cls(content=text, file_path=file_path)
return cls(content=text, path=path)
def to_path(self, file_path: Optional[Path] = None):
def to_path(self, path: Optional[Path] = None):
"""
Save content to the specified file path.
"""
if file_path is not None:
self.file_path = file_path
if path is not None:
self.path = path
if self.file_path is None:
if self.path is None:
raise ValueError("File path is not set.")
self.file_path.parent.mkdir(parents=True, exist_ok=True)
self.file_path.write_text(self.content)
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.write_text(self.content, encoding="utf-8")
def persist(self):
"""
@ -140,25 +156,35 @@ class IndexableDocument(Document):
raise NotImplementedError("Data type not supported for metadata extraction.")
class RepoMetadata(BaseModel):
name: str = Field(default="")
n_docs: int = Field(default=0)
n_chars: int = Field(default=0)
symbols: list = Field(default_factory=list)
class Repo(BaseModel):
# Name of this repo.
name: str = Field(default="")
# metadata: RepoMetadata = Field(default=RepoMetadata)
docs: dict[Path, Document] = Field(default_factory=dict)
codes: dict[Path, Document] = Field(default_factory=dict)
assets: dict[Path, Document] = Field(default_factory=dict)
repo_path: Path = Field(default_factory=Path)
path: Path = Field(default=None)
def _path(self, filename):
return self.repo_path / filename
return self.path / filename
@classmethod
def from_path(cls, repo_path: Path):
def from_path(cls, path: Path):
"""Load documents, code, and assets from a repository path."""
repo_path.mkdir(parents=True, exist_ok=True)
repo = Repo(repo_path = repo_path)
for file_path in repo_path.rglob('*'):
if file_path.is_file():
path.mkdir(parents=True, exist_ok=True)
repo = Repo(path=path, name=path.name)
for file_path in path.rglob('*'):
# FIXME: These judgments are difficult to support multiple programming languages and need to be more general
if file_path.is_file() and file_path.suffix in [".json", ".txt", ".md", ".py", ".js", ".css", ".html"]:
repo._set(file_path.read_text(), file_path)
return repo
@ -171,23 +197,24 @@ class Repo(BaseModel):
for asset in self.assets.values():
asset.to_path()
def _set(self, content: str, file_path: Path):
def _set(self, content: str, path: Path):
"""Add a document to the appropriate category based on its file extension."""
file_ext = file_path.suffix
suffix = path.suffix
doc = Document(content=content, path=path, name=str(path.relative_to(self.path)))
doc = Document(content=content, file_path=file_path)
if file_ext.lower() == '.md':
self.docs[file_path] = doc
elif file_ext.lower() in ['.py', '.js', '.css', '.html']:
self.codes[file_path] = doc
# FIXME: These judgments are difficult to support multiple programming languages and need to be more general
if suffix.lower() == '.md':
self.docs[path] = doc
elif suffix.lower() in ['.py', '.js', '.css', '.html']:
self.codes[path] = doc
else:
self.assets[file_path] = doc
self.assets[path] = doc
return doc
def set(self, content: str, filename: str):
"""Set a document and persist it to disk."""
file_path = self._path(filename)
doc = self._set(content, file_path)
path = self._path(filename)
doc = self._set(content, path)
doc.to_path()
def get(self, filename: str) -> Optional[Document]:
@ -195,13 +222,32 @@ class Repo(BaseModel):
path = self._path(filename)
return self.docs.get(path) or self.codes.get(path) or self.assets.get(path)
def get_text_documents(self) -> list[Document]:
return list(self.docs.values()) + list(self.codes.values())
def main():
repo1 = Repo.from_path(Path("/Users/alexanderwu/workspace/t1"))
def eda(self) -> RepoMetadata:
n_docs = sum(len(i) for i in [self.docs, self.codes, self.assets])
n_chars = sum(sum(len(j.content) for j in i.values()) for i in [self.docs, self.codes, self.assets])
symbols = RepoParser(base_directory=self.path).generate_symbols()
return RepoMetadata(name=self.name, n_docs=n_docs, n_chars=n_chars, symbols=symbols)
def set_existing_repo(path=CONFIG.workspace_path / "t1"):
repo1 = Repo.from_path(path)
repo1.set("wtf content", "doc/wtf_file.md")
repo1.set("wtf code", "code/wtf_file.py")
logger.info(repo1) # check doc
def load_existing_repo(path=CONFIG.workspace_path / "web_tetris"):
repo = Repo.from_path(path)
logger.info(repo)
logger.info(repo.eda())
def main():
load_existing_repo()
if __name__ == '__main__':
main()