feat: Change the operation of transmitting file content during the QA process to transmitting file names instead.

2026-07-20 16:51:07 +02:00 · 2023-11-23 22:41:44 +08:00 · 2023-11-23 22:41:44 +08:00 · ec3dd004af
commit ec3dd004af
parent 13b37306cd
8 changed files with 159 additions and 151 deletions
--- a/metagpt/actions/debug_error.py
+++ b/metagpt/actions/debug_error.py
@ -5,7 +5,6 @@
@Author  : alexanderwu
@File    : debug_error.py
 """
-import re

 from metagpt.actions.action import Action
 from metagpt.logs import logger
@ -36,18 +35,17 @@ class DebugError(Action):
    #     fixed_code = await self._aask(prompt)
    #     return fixed_code

-    async def run(self, context):
-        if "PASS" in context:
+    async def run(self, *args, **kwargs) -> str:
+        if "PASS" in self.context.output:
            return "", "the original code works fine, no need to debug"

-        file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
-
+        file_name = self.context.code_filename
        logger.info(f"Debug and rewrite {file_name}")

-        prompt = PROMPT_TEMPLATE.format(context=context)
+        prompt = PROMPT_TEMPLATE.format(context=self.context.output)

        rsp = await self._aask(prompt)

        code = CodeParser.parse_code(block="", text=rsp)

-        return file_name, code
+        return code
--- a/metagpt/actions/run_code.py
+++ b/metagpt/actions/run_code.py
@ -98,24 +98,22 @@ class RunCode(Action):
            stdout, stderr = process.communicate()
        return stdout.decode("utf-8"), stderr.decode("utf-8")

-    async def run(
-        self, code, mode="script", code_file_name="", test_code="", test_file_name="", command=[], **kwargs
-    ) -> str:
-        logger.info(f"Running {' '.join(command)}")
-        if mode == "script":
-            outs, errs = await self.run_script(command=command, **kwargs)
-        elif mode == "text":
-            outs, errs = await self.run_text(code=code)
+    async def run(self, *args, **kwargs) -> str:
+        logger.info(f"Running {' '.join(self.context.command)}")
+        if self.context.mode == "script":
+            outs, errs = await self.run_script(command=self.context.command, **kwargs)
+        elif self.context.mode == "text":
+            outs, errs = await self.run_text(code=self.context.code)

        logger.info(f"{outs=}")
        logger.info(f"{errs=}")

        context = CONTEXT.format(
-            code=code,
-            code_file_name=code_file_name,
-            test_code=test_code,
-            test_file_name=test_file_name,
-            command=" ".join(command),
+            code=self.context.code,
+            code_file_name=self.context.code_filename,
+            test_code=self.context.test_code,
+            test_file_name=self.context.test_filename,
+            command=" ".join(self.context.command),
            outs=outs[:500],  # outs might be long but they are not important, truncate them to avoid token overflow
            errs=errs[:10000],  # truncate errors to avoid token overflow
        )
--- a/metagpt/actions/write_test.py
+++ b/metagpt/actions/write_test.py
@ -6,7 +6,9 @@
@File    : environment.py
 """
 from metagpt.actions.action import Action
+from metagpt.config import CONFIG
 from metagpt.logs import logger
+from metagpt.schema import TestingContext
 from metagpt.utils.common import CodeParser

 PROMPT_TEMPLATE = """
@ -47,12 +49,12 @@ class WriteTest(Action):
            code = code_rsp
        return code

-    async def run(self, code_to_test, test_file_name, source_file_path, workspace):
+    async def run(self, *args, **kwargs) -> TestingContext:
        prompt = PROMPT_TEMPLATE.format(
-            code_to_test=code_to_test,
-            test_file_name=test_file_name,
-            source_file_path=source_file_path,
-            workspace=workspace,
+            code_to_test=self.context.code_doc.content,
+            test_file_name=self.context.test_doc.filename,
+            source_file_path=self.context.code_doc.root_relative_path,
+            workspace=CONFIG.git_repo.workdir,
        )
-        code = await self.write_code(prompt)
-        return code
+        self.context.test_doc.content = await self.write_code(prompt)
+        return self.context
--- a/metagpt/const.py
+++ b/metagpt/const.py
@ -61,3 +61,5 @@ SEQ_FLOW_FILE_REPO = "resources/seq_flow"
 SYSTEM_DESIGN_PDF_FILE_REPO = "resources/system_design"
 PRD_PDF_FILE_REPO = "resources/prd"
 TASK_PDF_FILE_REPO = "resources/api_spec_and_tasks"
+TEST_CODES_FILE_REPO = "tests"
+OUTPUTS_FILE_REPO = "outputs"
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -15,6 +15,7 @@ from __future__ import annotations

 import json
 from pathlib import Path
+from typing import Set

 from metagpt.actions import Action, WriteCode, WriteCodeReview, WriteTasks
 from metagpt.config import CONFIG
@ -22,7 +23,6 @@ from metagpt.const import SYSTEM_DESIGN_FILE_REPO, TASK_FILE_REPO
 from metagpt.logs import logger
 from metagpt.roles import Role
 from metagpt.schema import CodingContext, Document, Documents, Message
-from metagpt.utils.special_tokens import FILENAME_CODE_SEP, MSG_SEP


 class Engineer(Role):
@ -60,8 +60,8 @@ class Engineer(Role):
        m = json.loads(task_msg.content)
        return m.get("Task list")

-    async def _act_sp_precision(self, review=False) -> Message:
-        code_msg_all = []  # gather all code info, will pass to qa_engineer for tests later
+    async def _act_sp_precision(self, review=False) -> Set[str]:
+        changed_files = set()
        src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
        for todo in self.todos:
            """
@ -88,23 +88,26 @@ class Engineer(Role):
                content=coding_context.json(), instruct_content=coding_context, role=self.profile, cause_by=WriteCode
            )
            self._rc.memory.add(msg)
-            self.publish_message(msg)

-            code_msg = coding_context.filename + FILENAME_CODE_SEP + str(coding_context.code_doc.root_relative_path)
-            code_msg_all.append(code_msg)
-
-        logger.info(f"Done {CONFIG.src_workspace} generating.")
-        msg = Message(
-            content=MSG_SEP.join(code_msg_all),
-            role=self.profile,
-            cause_by=self._rc.todo,
-            send_to="Edward",
-        )
-        return msg
+            changed_files.add(coding_context.code_doc.filename)
+        return changed_files

    async def _act(self) -> Message:
        """Determines the mode of action based on whether code review is used."""
-        return await self._act_sp_precision(review=self.use_code_review)
+        changed_files = await self._act_sp_precision(review=self.use_code_review)
+        # 仅单测
+        if CONFIG.REQA_FILENAME and CONFIG.REQA_FILENAME not in changed_files:
+            changed_files.add(CONFIG.REQA_FILENAME)
+
+        from metagpt.roles import QaEngineer  # 避免循环引用
+
+        msg = Message(
+            content="\n".join(changed_files),
+            role=self.profile,
+            cause_by=WriteCodeReview if self.use_code_review else WriteCode,
+            send_to=QaEngineer,
+        )
+        return msg

    async def _think(self) -> Action | None:
        if not CONFIG.src_workspace:
@ -153,16 +156,6 @@ class Engineer(Role):
            )
            changed_files.docs[filename] = coding_doc
            self.todos.append(WriteCode(context=coding_doc, llm=self._llm))
-        # 仅单测
-        if CONFIG.REQA_FILENAME and CONFIG.REQA_FILENAME not in changed_files.docs:
-            context = await self._new_coding_context(
-                filename=CONFIG.REQA_FILENAME,
-                src_file_repo=src_file_repo,
-                task_file_repo=task_file_repo,
-                design_file_repo=design_file_repo,
-                dependency=dependency,
-            )
-            self.publish_message(Message(content=context.json(), instruct_content=context, cause_by=WriteCode))

        if self.todos:
            self._rc.todo = self.todos[0]
--- a/metagpt/roles/qa_engineer.py
+++ b/metagpt/roles/qa_engineer.py
@ -7,23 +7,15 @@
@Modified By: mashenquan, 2023-11-1. In accordance with Chapter 2.2.1 and 2.2.2 of RFC 116, modify the data
        type of the `cause_by` value in the `Message` to a string, and utilize the new message filtering feature.
 """
-import os
-from pathlib import Path
+import json

-from metagpt.actions import (
-    DebugError,
-    RunCode,
-    WriteCode,
-    WriteCodeReview,
-    WriteDesign,
-    WriteTest,
-)
-from metagpt.const import WORKSPACE_ROOT
+from metagpt.actions import DebugError, RunCode, WriteCode, WriteCodeReview, WriteTest
+from metagpt.config import CONFIG
+from metagpt.const import OUTPUTS_FILE_REPO, TEST_CODES_FILE_REPO
 from metagpt.logs import logger
 from metagpt.roles import Role
-from metagpt.schema import Message
-from metagpt.utils.common import CodeParser, any_to_str_set, parse_recipient
-from metagpt.utils.special_tokens import FILENAME_CODE_SEP, MSG_SEP
+from metagpt.schema import Document, Message, RunCodeContext, TestingContext
+from metagpt.utils.common import CodeParser, any_to_str_set


 class QaEngineer(Role):
@ -49,107 +41,98 @@ class QaEngineer(Role):
            return system_design_msg.instruct_content.dict().get("Python package name")
        return CodeParser.parse_str(block="Python package name", text=system_design_msg.content)

-    def get_workspace(self, return_proj_dir=True) -> Path:
-        msg = self._rc.memory.get_by_action(WriteDesign)[-1]
-        if not msg:
-            return WORKSPACE_ROOT / "src"
-        workspace = self.parse_workspace(msg)
-        # project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
-        if return_proj_dir:
-            return WORKSPACE_ROOT / workspace
-        # development codes directory: workspace/{package_name}/{package_name}
-        return WORKSPACE_ROOT / workspace / workspace
-
-    def write_file(self, filename: str, code: str):
-        workspace = self.get_workspace() / "tests"
-        file = workspace / filename
-        file.parent.mkdir(parents=True, exist_ok=True)
-        file.write_text(code)
-
    async def _write_test(self, message: Message) -> None:
-        code_msgs = message.content.split(MSG_SEP)
-        # result_msg_all = []
-        for code_msg in code_msgs:
+        changed_files = message.content.splitlines()
+        src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
+        tests_file_repo = CONFIG.git_repo.new_file_repository(TEST_CODES_FILE_REPO)
+        for filename in changed_files:
            # write tests
-            file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
-            code_to_test = open(file_path, "r").read()
-            if "test" in file_name:
-                continue  # Engineer might write some test files, skip testing a test file
-            test_file_name = "test_" + file_name
-            test_file_path = self.get_workspace() / "tests" / test_file_name
-            logger.info(f"Writing {test_file_name}..")
-            test_code = await WriteTest().run(
-                code_to_test=code_to_test,
-                test_file_name=test_file_name,
-                # source_file_name=file_name,
-                source_file_path=file_path,
-                workspace=self.get_workspace(),
+            if not filename or "test" in filename:
+                continue
+            code_doc = await src_file_repo.get(filename)
+            test_doc = await tests_file_repo.get("test_" + code_doc.filename)
+            if not test_doc:
+                test_doc = Document(
+                    root_path=str(tests_file_repo.root_path), filename="test_" + code_doc.filename, content=""
+                )
+            logger.info(f"Writing {test_doc.filename}..")
+            context = TestingContext(filename=test_doc.filename, test_doc=test_doc, code_doc=code_doc)
+            context = await WriteTest(context=context, llm=self._llm).run()
+            await tests_file_repo.save(
+                filename=context.test_doc.filename,
+                content=context.test_doc.content,
+                dependencies={context.code_doc.root_relative_path},
            )
-            self.write_file(test_file_name, test_code)

            # prepare context for run tests in next round
-            command = ["python", f"tests/{test_file_name}"]
-            file_info = {
-                "file_name": file_name,
-                "file_path": str(file_path),
-                "test_file_name": test_file_name,
-                "test_file_path": str(test_file_path),
-                "command": command,
-            }
+            run_code_context = RunCodeContext(
+                command=["python", context.test_doc.root_relative_path],
+                code_filename=context.code_doc.filename,
+                test_filename=context.test_doc.filename,
+                working_directory=str(CONFIG.git_repo.workdir),
+                additional_python_paths=[CONFIG.src_workspace],
+            )
+
            msg = Message(
-                content=str(file_info),
+                content=run_code_context.json(),
                role=self.profile,
                cause_by=WriteTest,
-                sent_from=self.profile,
-                send_to=self.profile,
+                sent_from=self,
+                send_to=self,
            )
            self.publish_message(msg)

-        logger.info(f"Done {self.get_workspace()}/tests generating.")
+        logger.info(f"Done {str(tests_file_repo.workdir)} generating.")

    async def _run_code(self, msg):
-        file_info = eval(msg.content)
-        development_file_path = file_info["file_path"]
-        test_file_path = file_info["test_file_path"]
-        if not os.path.exists(development_file_path) or not os.path.exists(test_file_path):
+        m = json.loads(msg.content)
+        run_code_context = RunCodeContext(**m)
+        src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
+        src_doc = await src_file_repo.get(run_code_context.code_filename)
+        if not src_doc:
            return
-
-        development_code = open(development_file_path, "r").read()
-        test_code = open(test_file_path, "r").read()
-        proj_dir = self.get_workspace()
-        development_code_dir = self.get_workspace(return_proj_dir=False)
-
-        result_msg = await RunCode().run(
-            mode="script",
-            code=development_code,
-            code_file_name=file_info["file_name"],
-            test_code=test_code,
-            test_file_name=file_info["test_file_name"],
-            command=file_info["command"],
-            working_directory=proj_dir,  # workspace/package_name, will run tests/test_xxx.py here
-            additional_python_paths=[development_code_dir],  # workspace/package_name/package_name,
-            # import statement inside package code needs this
+        test_file_repo = CONFIG.git_repo.new_file_repository(TEST_CODES_FILE_REPO)
+        test_doc = await test_file_repo.get(run_code_context.test_filename)
+        if not test_doc:
+            return
+        run_code_context.code = src_doc.content
+        run_code_context.test_code = test_doc.content
+        result_msg = await RunCode(context=run_code_context, llm=self._llm).run()
+        outputs_file_repo = CONFIG.git_repo.new_file_repository(OUTPUTS_FILE_REPO)
+        run_code_context.output_filename = run_code_context.test_filename + ".log"
+        await outputs_file_repo.save(
+            filename=run_code_context.output_filename,
+            content=result_msg,
+            dependencies={src_doc.root_relative_path, test_doc.root_relative_path},
+        )
+        run_code_context.code = None
+        run_code_context.test_code = None
+        msg = Message(
+            content=run_code_context.json(), role=self.profile, cause_by=RunCode, sent_from=self, send_to=self
        )
-
-        recipient = parse_recipient(result_msg)  # the recipient might be Engineer or myself
-        content = str(file_info) + FILENAME_CODE_SEP + result_msg
-        msg = Message(content=content, role=self.profile, cause_by=RunCode, sent_from=self.profile, send_to=recipient)
        self.publish_message(msg)

    async def _debug_error(self, msg):
-        file_info, context = msg.content.split(FILENAME_CODE_SEP)
-        file_name, code = await DebugError().run(context)
-        if file_name:
-            self.write_file(file_name, code)
-            recipient = msg.sent_from  # send back to the one who ran the code for another run, might be one's self
-            msg = Message(
-                content=file_info,
-                role=self.profile,
-                cause_by=DebugError,
-                sent_from=self.profile,
-                send_to=recipient,
-            )
-            self.publish_message(msg)
+        m = json.loads(msg.context)
+        run_code_context = RunCodeContext(**m)
+        output_file_repo = CONFIG.git_repo.new_file_repository(OUTPUTS_FILE_REPO)
+        output_doc = await output_file_repo.get(run_code_context.output_filename)
+        if not output_doc:
+            return
+        run_code_context.output = output_doc.content
+        code = await DebugError(context=run_code_context, llm=self._llm).run()
+        src_file_repo = CONFIG.git_repo.new_file_repository(CONFIG.src_workspace)
+        await src_file_repo.save(filename=run_code_context.code_filename, content=code)
+        run_code_context.output = None
+        run_code_context.output_filename = None
+        msg = Message(
+            content=run_code_context.json(),
+            role=self.profile,
+            cause_by=DebugError,
+            sent_from=self,
+            send_to=self,
+        )
+        self.publish_message(msg)

    async def _act(self) -> Message:
        if self.test_round > self.test_round_allowed:
@ -182,5 +165,6 @@ class QaEngineer(Role):
            role=self.profile,
            cause_by=WriteTest,
            sent_from=self.profile,
+            send_to="",
        )
        return result_msg
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -19,6 +19,7 @@ from typing import Dict, List, Optional, Set, TypedDict

 from pydantic import BaseModel, Field

+from metagpt.config import CONFIG
 from metagpt.const import (
    MESSAGE_ROUTE_CAUSE_BY,
    MESSAGE_ROUTE_FROM,
@ -59,6 +60,12 @@ class Document(BaseModel):
        """
        return os.path.join(self.root_path, self.filename)

+    @property
+    def full_path(self):
+        if not CONFIG.git_repo:
+            return None
+        return str(CONFIG.git_repo.workdir / self.root_path / self.filename)
+

 class Documents(BaseModel):
    """A class representing a collection of documents.
@ -245,3 +252,22 @@ class CodingContext(BaseModel):
    design_doc: Document
    task_doc: Document
    code_doc: Document
+
+
+class TestingContext(BaseModel):
+    filename: str
+    code_doc: Document
+    test_doc: Document
+
+
+class RunCodeContext(BaseModel):
+    mode: str = "script"
+    code: Optional[str]
+    code_filename: str = ""
+    test_code: Optional[str]
+    test_filename: str = ""
+    command: List[str] = Field(default_factory=list)
+    working_directory: str = ""
+    additional_python_paths: List[str] = Field(default_factory=list)
+    output_filename: Optional[str]
+    output: Optional[str]
--- a/metagpt/utils/git_repository.py
+++ b/metagpt/utils/git_repository.py
@ -160,7 +160,12 @@ class GitRepository:
        :param relative_path: The relative path to the file repository within the Git repository.
        :return: A new instance of FileRepository.
        """
-        return FileRepository(git_repo=self, relative_path=Path(relative_path))
+        path = Path(relative_path)
+        try:
+            path = path.relative_to(self.workdir)
+        except ValueError:
+            path = relative_path
+        return FileRepository(git_repo=self, relative_path=Path(path))

    async def get_dependency(self) -> DependencyFile:
        """Get the dependency file associated with the Git repository.