feat: Change the operation of transmitting file content during the QA process to transmitting file names instead.

This commit is contained in:
莘权 马 2023-11-23 22:41:44 +08:00
parent 13b37306cd
commit ec3dd004af
8 changed files with 159 additions and 151 deletions

View file

@ -5,7 +5,6 @@
@Author : alexanderwu
@File : debug_error.py
"""
import re
from metagpt.actions.action import Action
from metagpt.logs import logger
@ -36,18 +35,17 @@ class DebugError(Action):
# fixed_code = await self._aask(prompt)
# return fixed_code
async def run(self, context):
if "PASS" in context:
async def run(self, *args, **kwargs) -> str:
if "PASS" in self.context.output:
return "", "the original code works fine, no need to debug"
file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
file_name = self.context.code_filename
logger.info(f"Debug and rewrite {file_name}")
prompt = PROMPT_TEMPLATE.format(context=context)
prompt = PROMPT_TEMPLATE.format(context=self.context.output)
rsp = await self._aask(prompt)
code = CodeParser.parse_code(block="", text=rsp)
return file_name, code
return code

View file

@ -98,24 +98,22 @@ class RunCode(Action):
stdout, stderr = process.communicate()
return stdout.decode("utf-8"), stderr.decode("utf-8")
async def run(
self, code, mode="script", code_file_name="", test_code="", test_file_name="", command=[], **kwargs
) -> str:
logger.info(f"Running {' '.join(command)}")
if mode == "script":
outs, errs = await self.run_script(command=command, **kwargs)
elif mode == "text":
outs, errs = await self.run_text(code=code)
async def run(self, *args, **kwargs) -> str:
logger.info(f"Running {' '.join(self.context.command)}")
if self.context.mode == "script":
outs, errs = await self.run_script(command=self.context.command, **kwargs)
elif self.context.mode == "text":
outs, errs = await self.run_text(code=self.context.code)
logger.info(f"{outs=}")
logger.info(f"{errs=}")
context = CONTEXT.format(
code=code,
code_file_name=code_file_name,
test_code=test_code,
test_file_name=test_file_name,
command=" ".join(command),
code=self.context.code,
code_file_name=self.context.code_filename,
test_code=self.context.test_code,
test_file_name=self.context.test_filename,
command=" ".join(self.context.command),
outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow
errs=errs[:10000], # truncate errors to avoid token overflow
)

View file

@ -6,7 +6,9 @@
@File : environment.py
"""
from metagpt.actions.action import Action
from metagpt.config import CONFIG
from metagpt.logs import logger
from metagpt.schema import TestingContext
from metagpt.utils.common import CodeParser
PROMPT_TEMPLATE = """
@ -47,12 +49,12 @@ class WriteTest(Action):
code = code_rsp
return code
async def run(self, code_to_test, test_file_name, source_file_path, workspace):
async def run(self, *args, **kwargs) -> TestingContext:
prompt = PROMPT_TEMPLATE.format(
code_to_test=code_to_test,
test_file_name=test_file_name,
source_file_path=source_file_path,
workspace=workspace,
code_to_test=self.context.code_doc.content,
test_file_name=self.context.test_doc.filename,
source_file_path=self.context.code_doc.root_relative_path,
workspace=CONFIG.git_repo.workdir,
)
code = await self.write_code(prompt)
return code
self.context.test_doc.content = await self.write_code(prompt)
return self.context

View file

@ -61,3 +61,5 @@ SEQ_FLOW_FILE_REPO = "resources/seq_flow"
SYSTEM_DESIGN_PDF_FILE_REPO = "resources/system_design"
PRD_PDF_FILE_REPO = "resources/prd"
TASK_PDF_FILE_REPO = "resources/api_spec_and_tasks"
TEST_CODES_FILE_REPO = "tests"
OUTPUTS_FILE_REPO = "outputs"

View file

@ -15,6 +15,7 @@ from __future__ import annotations
import json
from pathlib import Path
from typing import Set
from metagpt.actions import Action, WriteCode, WriteCodeReview, WriteTasks
from metagpt.config import CONFIG
@ -22,7 +23,6 @@ from metagpt.const import SYSTEM_DESIGN_FILE_REPO, TASK_FILE_REPO
from metagpt.logs import logger
from metagpt.roles import Role
from metagpt.schema import CodingContext, Document, Documents, Message
from metagpt.utils.special_tokens import FILENAME_CODE_SEP, MSG_SEP
class Engineer(Role):
@ -60,8 +60,8 @@ class Engineer(Role):
m = json.loads(task_msg.content)
return m.get("Task list")
async def _act_sp_precision(self, review=False) -> Message:
code_msg_all = [] # gather all code info, will pass to qa_engineer for tests later
async def _act_sp_precision(self, review=False) -> Set[str]:
changed_files = set()
src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
for todo in self.todos:
"""
@ -88,23 +88,26 @@ class Engineer(Role):
content=coding_context.json(), instruct_content=coding_context, role=self.profile, cause_by=WriteCode
)
self._rc.memory.add(msg)
self.publish_message(msg)
code_msg = coding_context.filename + FILENAME_CODE_SEP + str(coding_context.code_doc.root_relative_path)
code_msg_all.append(code_msg)
logger.info(f"Done {CONFIG.src_workspace} generating.")
msg = Message(
content=MSG_SEP.join(code_msg_all),
role=self.profile,
cause_by=self._rc.todo,
send_to="Edward",
)
return msg
changed_files.add(coding_context.code_doc.filename)
return changed_files
async def _act(self) -> Message:
"""Determines the mode of action based on whether code review is used."""
return await self._act_sp_precision(review=self.use_code_review)
changed_files = await self._act_sp_precision(review=self.use_code_review)
# 仅单测
if CONFIG.REQA_FILENAME and CONFIG.REQA_FILENAME not in changed_files:
changed_files.add(CONFIG.REQA_FILENAME)
from metagpt.roles import QaEngineer # 避免循环引用
msg = Message(
content="\n".join(changed_files),
role=self.profile,
cause_by=WriteCodeReview if self.use_code_review else WriteCode,
send_to=QaEngineer,
)
return msg
async def _think(self) -> Action | None:
if not CONFIG.src_workspace:
@ -153,16 +156,6 @@ class Engineer(Role):
)
changed_files.docs[filename] = coding_doc
self.todos.append(WriteCode(context=coding_doc, llm=self._llm))
# 仅单测
if CONFIG.REQA_FILENAME and CONFIG.REQA_FILENAME not in changed_files.docs:
context = await self._new_coding_context(
filename=CONFIG.REQA_FILENAME,
src_file_repo=src_file_repo,
task_file_repo=task_file_repo,
design_file_repo=design_file_repo,
dependency=dependency,
)
self.publish_message(Message(content=context.json(), instruct_content=context, cause_by=WriteCode))
if self.todos:
self._rc.todo = self.todos[0]

View file

@ -7,23 +7,15 @@
@Modified By: mashenquan, 2023-11-1. In accordance with Chapter 2.2.1 and 2.2.2 of RFC 116, modify the data
type of the `cause_by` value in the `Message` to a string, and utilize the new message filtering feature.
"""
import os
from pathlib import Path
import json
from metagpt.actions import (
DebugError,
RunCode,
WriteCode,
WriteCodeReview,
WriteDesign,
WriteTest,
)
from metagpt.const import WORKSPACE_ROOT
from metagpt.actions import DebugError, RunCode, WriteCode, WriteCodeReview, WriteTest
from metagpt.config import CONFIG
from metagpt.const import OUTPUTS_FILE_REPO, TEST_CODES_FILE_REPO
from metagpt.logs import logger
from metagpt.roles import Role
from metagpt.schema import Message
from metagpt.utils.common import CodeParser, any_to_str_set, parse_recipient
from metagpt.utils.special_tokens import FILENAME_CODE_SEP, MSG_SEP
from metagpt.schema import Document, Message, RunCodeContext, TestingContext
from metagpt.utils.common import CodeParser, any_to_str_set
class QaEngineer(Role):
@ -49,107 +41,98 @@ class QaEngineer(Role):
return system_design_msg.instruct_content.dict().get("Python package name")
return CodeParser.parse_str(block="Python package name", text=system_design_msg.content)
def get_workspace(self, return_proj_dir=True) -> Path:
msg = self._rc.memory.get_by_action(WriteDesign)[-1]
if not msg:
return WORKSPACE_ROOT / "src"
workspace = self.parse_workspace(msg)
# project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
if return_proj_dir:
return WORKSPACE_ROOT / workspace
# development codes directory: workspace/{package_name}/{package_name}
return WORKSPACE_ROOT / workspace / workspace
def write_file(self, filename: str, code: str):
workspace = self.get_workspace() / "tests"
file = workspace / filename
file.parent.mkdir(parents=True, exist_ok=True)
file.write_text(code)
async def _write_test(self, message: Message) -> None:
code_msgs = message.content.split(MSG_SEP)
# result_msg_all = []
for code_msg in code_msgs:
changed_files = message.content.splitlines()
src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
tests_file_repo = CONFIG.git_repo.new_file_repository(TEST_CODES_FILE_REPO)
for filename in changed_files:
# write tests
file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
code_to_test = open(file_path, "r").read()
if "test" in file_name:
continue # Engineer might write some test files, skip testing a test file
test_file_name = "test_" + file_name
test_file_path = self.get_workspace() / "tests" / test_file_name
logger.info(f"Writing {test_file_name}..")
test_code = await WriteTest().run(
code_to_test=code_to_test,
test_file_name=test_file_name,
# source_file_name=file_name,
source_file_path=file_path,
workspace=self.get_workspace(),
if not filename or "test" in filename:
continue
code_doc = await src_file_repo.get(filename)
test_doc = await tests_file_repo.get("test_" + code_doc.filename)
if not test_doc:
test_doc = Document(
root_path=str(tests_file_repo.root_path), filename="test_" + code_doc.filename, content=""
)
logger.info(f"Writing {test_doc.filename}..")
context = TestingContext(filename=test_doc.filename, test_doc=test_doc, code_doc=code_doc)
context = await WriteTest(context=context, llm=self._llm).run()
await tests_file_repo.save(
filename=context.test_doc.filename,
content=context.test_doc.content,
dependencies={context.code_doc.root_relative_path},
)
self.write_file(test_file_name, test_code)
# prepare context for run tests in next round
command = ["python", f"tests/{test_file_name}"]
file_info = {
"file_name": file_name,
"file_path": str(file_path),
"test_file_name": test_file_name,
"test_file_path": str(test_file_path),
"command": command,
}
run_code_context = RunCodeContext(
command=["python", context.test_doc.root_relative_path],
code_filename=context.code_doc.filename,
test_filename=context.test_doc.filename,
working_directory=str(CONFIG.git_repo.workdir),
additional_python_paths=[CONFIG.src_workspace],
)
msg = Message(
content=str(file_info),
content=run_code_context.json(),
role=self.profile,
cause_by=WriteTest,
sent_from=self.profile,
send_to=self.profile,
sent_from=self,
send_to=self,
)
self.publish_message(msg)
logger.info(f"Done {self.get_workspace()}/tests generating.")
logger.info(f"Done {str(tests_file_repo.workdir)} generating.")
async def _run_code(self, msg):
file_info = eval(msg.content)
development_file_path = file_info["file_path"]
test_file_path = file_info["test_file_path"]
if not os.path.exists(development_file_path) or not os.path.exists(test_file_path):
m = json.loads(msg.content)
run_code_context = RunCodeContext(**m)
src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
src_doc = await src_file_repo.get(run_code_context.code_filename)
if not src_doc:
return
development_code = open(development_file_path, "r").read()
test_code = open(test_file_path, "r").read()
proj_dir = self.get_workspace()
development_code_dir = self.get_workspace(return_proj_dir=False)
result_msg = await RunCode().run(
mode="script",
code=development_code,
code_file_name=file_info["file_name"],
test_code=test_code,
test_file_name=file_info["test_file_name"],
command=file_info["command"],
working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here
additional_python_paths=[development_code_dir], # workspace/package_name/package_name,
# import statement inside package code needs this
test_file_repo = CONFIG.git_repo.new_file_repository(TEST_CODES_FILE_REPO)
test_doc = await test_file_repo.get(run_code_context.test_filename)
if not test_doc:
return
run_code_context.code = src_doc.content
run_code_context.test_code = test_doc.content
result_msg = await RunCode(context=run_code_context, llm=self._llm).run()
outputs_file_repo = CONFIG.git_repo.new_file_repository(OUTPUTS_FILE_REPO)
run_code_context.output_filename = run_code_context.test_filename + ".log"
await outputs_file_repo.save(
filename=run_code_context.output_filename,
content=result_msg,
dependencies={src_doc.root_relative_path, test_doc.root_relative_path},
)
run_code_context.code = None
run_code_context.test_code = None
msg = Message(
content=run_code_context.json(), role=self.profile, cause_by=RunCode, sent_from=self, send_to=self
)
recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself
content = str(file_info) + FILENAME_CODE_SEP + result_msg
msg = Message(content=content, role=self.profile, cause_by=RunCode, sent_from=self.profile, send_to=recipient)
self.publish_message(msg)
async def _debug_error(self, msg):
file_info, context = msg.content.split(FILENAME_CODE_SEP)
file_name, code = await DebugError().run(context)
if file_name:
self.write_file(file_name, code)
recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self
msg = Message(
content=file_info,
role=self.profile,
cause_by=DebugError,
sent_from=self.profile,
send_to=recipient,
)
self.publish_message(msg)
m = json.loads(msg.context)
run_code_context = RunCodeContext(**m)
output_file_repo = CONFIG.git_repo.new_file_repository(OUTPUTS_FILE_REPO)
output_doc = await output_file_repo.get(run_code_context.output_filename)
if not output_doc:
return
run_code_context.output = output_doc.content
code = await DebugError(context=run_code_context, llm=self._llm).run()
src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
await src_file_repo.save(filename=run_code_context.code_filename, content=code)
run_code_context.output = None
run_code_context.output_filename = None
msg = Message(
content=run_code_context.json(),
role=self.profile,
cause_by=DebugError,
sent_from=self,
send_to=self,
)
self.publish_message(msg)
async def _act(self) -> Message:
if self.test_round > self.test_round_allowed:
@ -182,5 +165,6 @@ class QaEngineer(Role):
role=self.profile,
cause_by=WriteTest,
sent_from=self.profile,
send_to="",
)
return result_msg

View file

@ -19,6 +19,7 @@ from typing import Dict, List, Optional, Set, TypedDict
from pydantic import BaseModel, Field
from metagpt.config import CONFIG
from metagpt.const import (
MESSAGE_ROUTE_CAUSE_BY,
MESSAGE_ROUTE_FROM,
@ -59,6 +60,12 @@ class Document(BaseModel):
"""
return os.path.join(self.root_path, self.filename)
@property
def full_path(self):
if not CONFIG.git_repo:
return None
return str(CONFIG.git_repo.workdir / self.root_path / self.filename)
class Documents(BaseModel):
"""A class representing a collection of documents.
@ -245,3 +252,22 @@ class CodingContext(BaseModel):
design_doc: Document
task_doc: Document
code_doc: Document
class TestingContext(BaseModel):
filename: str
code_doc: Document
test_doc: Document
class RunCodeContext(BaseModel):
mode: str = "script"
code: Optional[str]
code_filename: str = ""
test_code: Optional[str]
test_filename: str = ""
command: List[str] = Field(default_factory=list)
working_directory: str = ""
additional_python_paths: List[str] = Field(default_factory=list)
output_filename: Optional[str]
output: Optional[str]

View file

@ -160,7 +160,12 @@ class GitRepository:
:param relative_path: The relative path to the file repository within the Git repository.
:return: A new instance of FileRepository.
"""
return FileRepository(git_repo=self, relative_path=Path(relative_path))
path = Path(relative_path)
try:
path = path.relative_to(self.workdir)
except ValueError:
path = relative_path
return FileRepository(git_repo=self, relative_path=Path(path))
async def get_dependency(self) -> DependencyFile:
"""Get the dependency file associated with the Git repository.