make qa_engineer, enable writetest and runcode

2026-05-15 11:02:36 +02:00 · 2023-07-25 22:57:24 +08:00 · 2023-07-25 22:57:24 +08:00 · 84b6c2bf8a
commit 84b6c2bf8a
parent 007c8c0457
7 changed files with 202 additions and 35 deletions
--- a/metagpt/actions/run_code.py
+++ b/metagpt/actions/run_code.py
@ -6,20 +6,57 @@
@File    : run_code.py
 """
 import traceback
+import os
+import subprocess

+from metagpt.logs import logger
 from metagpt.actions.action import Action


 class RunCode(Action):
-    def __init__(self, name, context=None, llm=None):
+    def __init__(self, name="RunCode", context=None, llm=None):
        super().__init__(name, context, llm)

-    async def run(self, code):
+    @classmethod
+    async def run_text(cls, code):
        try:
            # We will document_store the result in this dictionary
            namespace = {}
            exec(code, namespace)
-            return namespace.get('result', None)
+            return namespace.get('result', None), ""
        except Exception:
            # If there is an error in the code, return the error message
-            return traceback.format_exc()
+            return "", traceback.format_exc()
+
+    @classmethod
+    async def run_script(cls, working_directory, additional_python_paths=[], command=[]):
+        working_directory = str(working_directory)
+        additional_python_paths = [str(path) for path in additional_python_paths]
+
+        # Copy the current environment variables
+        env = os.environ.copy()
+
+        # Modify the PYTHONPATH environment variable
+        additional_python_paths = [working_directory] + additional_python_paths
+        additional_python_paths = ":".join(additional_python_paths)
+        env['PYTHONPATH'] = additional_python_paths + ':' + env.get('PYTHONPATH', '')
+
+        # Start the subprocess
+        process = subprocess.Popen(command, cwd=working_directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
+
+        try:
+            # Wait for the process to complete, with a timeout
+            stdout, stderr = process.communicate(timeout=10)
+        except subprocess.TimeoutExpired:
+            logger.info("The command did not complete within the given timeout.")
+            process.kill()  # Kill the process if it times out
+            stdout, stderr = process.communicate()
+        return stdout.decode('utf-8'), stderr.decode('utf-8')
+
+    async def run(self, context="", mode="script", **kwargs):
+        if mode == "script":
+            outs, errs = await self.run_script(**kwargs)
+        elif mode == "text":
+            outs, errs = await self.run_text(**kwargs)
+        
+        return outs, errs
--- a/metagpt/actions/write_test.py
+++ b/metagpt/actions/write_test.py
@ -5,22 +5,45 @@
@Author  : alexanderwu
@File    : write_test.py
 """
+from metagpt.logs import logger
 from metagpt.actions.action import Action
+from metagpt.utils.common import CodeParser

+PROMPT_TEMPLATE = """
+NOTICE
+1. Role: You are a QA engineer; the main goal is to design, develop, and execute PEP8 compliant, well-structured, maintainable test cases and scripts for Python 3.9. Your focus should be on ensuring the product quality of the entire project through systematic testing.
+2. Requirement: Based on the context, develop a comprehensive test suite that adequately covers all relevant aspects of the code file under review. Your test suite will be part of the overall project QA, so please develop complete, robust, and reusable test cases.
+3. Attention1: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script.
+4. Attention2: If there are any settings in your tests, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE.
+5. Attention3: YOU MUST FOLLOW "Data structures and interface definitions". DO NOT CHANGE ANY DESIGN. Make sure your tests respect the existing design and ensure their validity.
+6. Think before writing: What should be tested and validated in this document? What edge cases could exist? What might fail?
+7. CAREFULLY CHECK THAT YOU DON'T MISS ANY NECESSARY TEST CASES/SCRIPTS IN THIS FILE.
+Attention: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script and triple quote.
+-----
+## Given the following code, please write appropriate test cases using Python's unittest framework to verify the correctness and robustness of this code:
+```python
+{code_to_test}
+```
+Note that the code to test is at {source_file_path}, we will put your test code at {workspace}/tests/{test_file_name}, and run your test code from {workspace},
+you should correctly import the necessary classes based on these file locations!
+## {test_file_name}: Write test code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
+"""

 class WriteTest(Action):
    def __init__(self, name="", context=None, llm=None):
        super().__init__(name, context, llm)
-        self.code = None
-        self.test_prompt_template = """
-        Given the following code or function:
-        {code}

-        As a test engineer, please write appropriate test cases using Python's unittest framework to verify the correctness and robustness of this code.
-        """
+    async def write_code(self, prompt):
+        code_rsp = await self._aask(prompt)
+        code = CodeParser.parse_code(block="", text=code_rsp)
+        return code

-    async def run(self, code):
-        self.code = code
-        prompt = self.test_prompt_template.format(code=self.code)
-        test_cases = await self._aask(prompt)
-        return test_cases
+    async def run(self, code_to_test, test_file_name, source_file_path, workspace):
+        prompt = PROMPT_TEMPLATE.format(
+            code_to_test=code_to_test,
+            test_file_name=test_file_name,
+            source_file_path=source_file_path,
+            workspace=workspace
+        )
+        code = await self.write_code(prompt)
+        return code
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -16,6 +16,7 @@ from metagpt.roles import Role
 from metagpt.actions import WriteCode, WriteCodeReview, WriteTasks, WriteDesign
 from metagpt.schema import Message
 from metagpt.utils.common import CodeParser
+from metagpt.utils.special_tokens import WRITECODE_MSG_SEP, FILENAME_CODE_SEP


 async def gather_ordered_k(coros, k) -> list:
@ -95,6 +96,7 @@ class Engineer(Role):
        file = workspace / filename
        file.parent.mkdir(parents=True, exist_ok=True)
        file.write_text(code)
+        return file

    def recv(self, message: Message) -> None:
        self._rc.memory.add(message)
@ -126,23 +128,27 @@ class Engineer(Role):
        return msg

    async def _act_sp(self) -> Message:
+        code_msg_all = [] # gather all code info, will pass to qa_engineer for tests later
        for todo in self.todos:
-            code_rsp = await WriteCode().run(
+            code = await WriteCode().run(
                context=self._rc.history,
                filename=todo
            )
            # logger.info(todo)
            # logger.info(code_rsp)
            # code = self.parse_code(code_rsp)
-            self.write_file(todo, code_rsp)
-            msg = Message(content=code_rsp, role=self.profile, cause_by=type(self._rc.todo))
+            file_path = self.write_file(todo, code)
+            msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo))
            self._rc.memory.add(msg)

+            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
+
        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content="all done.", role=self.profile, cause_by=type(self._rc.todo))
+        msg = Message(content=WRITECODE_MSG_SEP.join(code_msg_all), role=self.profile, cause_by=type(self._rc.todo))
        return msg

    async def _act_sp_precision(self) -> Message:
+        code_msg_all = [] # gather all code info, will pass to qa_engineer for tests later
        for todo in self.todos:
            """
            # 从历史信息中挑选必须的信息，以减少prompt长度（人工经验总结）
@ -173,12 +179,14 @@ class Engineer(Role):
                except Exception as e:
                    logger.error("code review failed!", e)
                    pass
-            self.write_file(todo, code)
+            file_path = self.write_file(todo, code)
            msg = Message(content=code, role=self.profile, cause_by=WriteCode)
            self._rc.memory.add(msg)

+            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
+
        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content="all done.", role=self.profile, cause_by=WriteCode)
+        msg = Message(content=WRITECODE_MSG_SEP.join(code_msg_all), role=self.profile, cause_by=type(self._rc.todo))
        return msg

    async def _act(self) -> Message:
--- a/metagpt/roles/qa_engineer.py
+++ b/metagpt/roles/qa_engineer.py
@ -5,11 +5,93 @@
@Author  : alexanderwu
@File    : qa_engineer.py
 """
-from metagpt.actions import WriteTest
-from metagpt.roles import Role
+import re
+from pathlib import Path

+from metagpt.actions import WriteTest, WriteCode, WriteDesign, RunCode
+from metagpt.const import WORKSPACE_ROOT
+from metagpt.logs import logger
+from metagpt.roles import Role
+from metagpt.schema import Message
+from metagpt.roles.engineer import Engineer
+from metagpt.utils.common import CodeParser
+from metagpt.utils.special_tokens import WRITECODE_MSG_SEP, FILENAME_CODE_SEP

 class QaEngineer(Role):
-    def __init__(self, name, profile, goal, constraints):
+    def __init__(self, name="Edward", profile="QA Engineer",
+                 goal="Write comprehensive and robust tests to ensure codes will work as expected without bugs",
+                 constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain"):
        super().__init__(name, profile, goal, constraints)
        self._init_actions([WriteTest])
+        self._watch([WriteCode])
+    
+    @classmethod
+    def parse_workspace(cls, system_design_msg: Message) -> str:
+        if not system_design_msg.instruct_content:
+            return system_design_msg.instruct_content.dict().get("Python package name")
+        return CodeParser.parse_str(block="Python package name", text=system_design_msg.content)
+    
+    def get_workspace(self, return_proj_dir=True) -> Path:
+        msg = self._rc.memory.get_by_action(WriteDesign)[-1]
+        if not msg:
+            return WORKSPACE_ROOT / 'src'
+        workspace = self.parse_workspace(msg)
+        # project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
+        # source codes directory: workspace/{package_name}/{package_name}
+        if return_proj_dir:
+            return WORKSPACE_ROOT / workspace
+        return WORKSPACE_ROOT / workspace / workspace
+        
+    
+    def write_file(self, filename: str, code: str):
+        workspace = self.get_workspace() / 'tests'
+        file = workspace / filename
+        file.parent.mkdir(parents=True, exist_ok=True)
+        file.write_text(code)
+
+    def recv(self, message: Message) -> None:
+        self._rc.memory.add(message)
+    
+    async def _act(self) -> Message:
+        code_action_watched = self._rc.important_memory[-1]
+        code_msgs = code_action_watched.content.split(WRITECODE_MSG_SEP)
+        for code_msg in code_msgs:
+            
+            # write tests
+            file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP)
+            test_file_name = "test_" + file_name
+            logger.info(f'Writing {test_file_name}..')
+            code = await WriteTest().run(
+                code_to_test=code_to_test,
+                test_file_name=test_file_name,
+                # source_file_name=file_name,
+                source_file_path=file_path,
+                workspace=self.get_workspace()
+            )
+            self.write_file(test_file_name, code)
+
+            # add to memory
+            msg = Message(content=code, role=self.profile, cause_by=WriteTest)
+            self._rc.memory.add(msg)
+
+            # run tests
+            stdout, stderr = await RunCode().run(
+                mode="script",
+                working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here
+                additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name,
+                                                                                     # import statement inside package code needs this
+                command=['python', f'tests/{test_file_name}']
+            )
+            logger.info(stdout)
+            logger.info(stderr)
+
+        # RunCode().run(
+        #     mode="script",
+        #     working_directory=self.get_workspace(),
+        #     additional_python_paths=[self.get_workspace(return_proj_dir=False)],
+        #     command=['python', '-m', 'unittest', 'discover', '-s', 'tests']
+        # )
+
+        logger.info(f'Done {self.get_workspace()} generating.')
+        msg = Message(content="all done.", role=self.profile, cause_by=WriteTest)
+        return msg
--- a/metagpt/utils/special_tokens.py
+++ b/metagpt/utils/special_tokens.py
@ -0,0 +1,4 @@
+# token to separate different code messages in a WriteCode Message content
+WRITECODE_MSG_SEP = "#*000*#" 
+# token to seperate file name and the actual code text in a code message
+FILENAME_CODE_SEP = "#*001*#"
--- a/startup.py
+++ b/startup.py
@ -4,17 +4,14 @@ import asyncio

 import fire

-from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager
+from metagpt.roles import Architect, Engineer, ProductManager, ProjectManager, QaEngineer
 from metagpt.software_company import SoftwareCompany


 async def startup(idea: str, investment: float = 3.0, n_round: int = 5, code_review: bool = False):
    """Run a startup. Be a boss."""
    company = SoftwareCompany()
-    company.hire([ProductManager(),
-                  Architect(),
-                  ProjectManager(),
-                  Engineer(n_borg=5, use_code_review=code_review)])
+    company.hire([ProductManager(), Architect(), ProjectManager(), Engineer(n_borg=5), QaEngineer()])
    company.invest(investment)
    company.start_project(idea)
    await company.run(n_round=n_round)
--- a/tests/metagpt/actions/test_write_test.py
+++ b/tests/metagpt/actions/test_write_test.py
@ -8,19 +8,35 @@
 import pytest

 from metagpt.actions.write_test import WriteTest
+from metagpt.logs import logger


@pytest.mark.asyncio
 async def test_write_test():
    code = """
-    def add(a, b):
-        return a + b
+    import random
+    from typing import Tuple
+
+    class Food:
+        def __init__(self, position: Tuple[int, int]):
+            self.position = position
+
+        def generate(self, max_y: int, max_x: int):
+            self.position = (random.randint(1, max_y - 1), random.randint(1, max_x - 1))
    """

-    write_test = WriteTest("write_test")
+    write_test = WriteTest()

-    test_cases = await write_test.run(code)
+    test_code = await write_test.run(
+        code_to_test=code,
+        test_file_name="test_food.py",
+        source_file_path="/some/dummy/path/cli_snake_game/cli_snake_game/food.py",
+        workspace="/some/dummy/path/cli_snake_game"
+    )
+    logger.info(test_code)

    # We cannot exactly predict the generated test cases, but we can check if it is a string and if it is not empty
-    assert isinstance(test_cases, str)
-    assert len(test_cases) > 0
+    assert isinstance(test_code, str)
+    assert "from cli_snake_game.food import Food" in test_code
+    assert "class TestFood(unittest.TestCase)" in test_code
+    assert "def test_generate" in test_code