1. record news in rc; 2. add msg sent to; 3. qa_engineer restructure

2026-06-11 15:15:18 +02:00 · 2023-07-31 02:12:34 +08:00 · 2023-07-31 02:12:34 +08:00 · 6bf527d31e
commit 6bf527d31e
parent 9ba18672c6
9 changed files with 330 additions and 63 deletions
--- a/metagpt/actions/debug_error.py
+++ b/metagpt/actions/debug_error.py
@ -7,13 +7,32 @@
 """
 from metagpt.actions.action import Action

-
+PROMPT_TEMPLATE = """
+NOTICE
+1. Role: You are a Development Engineer or QA engineer;
+2. Task: You received this message from another Development Engineer or QA engineer who run or test your code. 
+Based on the message, first, figure out your own role, i.e. Engineer or QaEngineer,
+then rewrite the development code or the test code based on your role, the error, and the summary, such that all bugs are fixed and the code performs well.
+Attention: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script and triple quote.
+The message is as follows:
+{context}
+---
+Now you should start rewriting the code:
+## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
+"""
 class DebugError(Action):
    def __init__(self, name, context=None, llm=None):
        super().__init__(name, context, llm)

-    async def run(self, code, error):
-        prompt = f"Here is a piece of Python code:\n\n{code}\n\nThe following error occurred during execution:" \
-                 f"\n\n{error}\n\nPlease try to fix the error in this code."
-        fixed_code = await self._aask(prompt)
-        return fixed_code
+    # async def run(self, code, error):
+    #     prompt = f"Here is a piece of Python code:\n\n{code}\n\nThe following error occurred during execution:" \
+    #              f"\n\n{error}\n\nPlease try to fix the error in this code."
+    #     fixed_code = await self._aask(prompt)
+    #     return fixed_code
+    
+    async def run(self, context):
+        prompt = PROMPT_TEMPLATE.format(context=context)
+        
+        rsp = await self._aask(prompt)
+
+        return rsp
--- a/metagpt/actions/run_code.py
+++ b/metagpt/actions/run_code.py
@ -12,6 +12,42 @@ import subprocess
 from metagpt.logs import logger
 from metagpt.actions.action import Action

+PROMPT_TEMPLATE = """
+Role: You are a senior development and qa engineer, your role is summarize the code running result.
+If the running result does not include an error, you should explicitly approve the result.
+On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error,
+and give specific instructions on fixing the errors.
+---
+## Development Code File Name
+{code_file_name}
+## Development Code
+```python
+{code}
+```
+## Test File Name
+{test_file_name}
+## Test Code
+```python
+{test_code}
+```
+## Running Command
+{command}
+## Running Output
+standard output: {outs};
+standard errors: {errs};
+## instruction:
+Please summarize the cause of the errors and give correction instruction
+## File To Rewrite
+Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
+## Status:
+Determine if all of the code works fine, if so write PASS, else FAIL,
+WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
+## Send To:
+Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
+WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
+---
+You should fill in necessary summary, status, send to, and finally return all content between the --- segment line.
+"""

 class RunCode(Action):
    def __init__(self, name="RunCode", context=None, llm=None):
@ -52,11 +88,24 @@ class RunCode(Action):
            process.kill()  # Kill the process if it times out
            stdout, stderr = process.communicate()
        return stdout.decode('utf-8'), stderr.decode('utf-8')
-
-    async def run(self, context="", mode="script", **kwargs):
+    
+    async def run(
+        self, code, mode="script", code_file_name="", test_code="", test_file_name="", command=[], **kwargs
+    ):
        if mode == "script":
-            outs, errs = await self.run_script(**kwargs)
+            outs, errs = await self.run_script(command=command, **kwargs)
        elif mode == "text":
-            outs, errs = await self.run_text(**kwargs)
+            outs, errs = await self.run_text(code=code)
        
-        return outs, errs
+        logger.info(outs)
+        logger.info(errs)
+        
+        prompt = PROMPT_TEMPLATE.format(
+            code=code, code_file_name=code_file_name,
+            test_code=test_code, test_file_name=test_file_name,
+            command=" ".join(command),
+            outs=outs, errs=errs
+        )
+        rsp = await self._aask(prompt)
+
+        return rsp
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -16,7 +16,7 @@ from metagpt.roles import Role
 from metagpt.actions import WriteCode, WriteCodeReview, WriteTasks, WriteDesign
 from metagpt.schema import Message
 from metagpt.utils.common import CodeParser
-from metagpt.utils.special_tokens import WRITECODE_MSG_SEP, FILENAME_CODE_SEP
+from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP


 async def gather_ordered_k(coros, k) -> list:
@ -144,7 +144,12 @@ class Engineer(Role):
            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))

        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content=WRITECODE_MSG_SEP.join(code_msg_all), role=self.profile, cause_by=type(self._rc.todo))
+        msg = Message(
+            content=MSG_SEP.join(code_msg_all),
+            role=self.profile,
+            cause_by=type(self._rc.todo),
+            send_to="QaEngineer"
+        )
        return msg

    async def _act_sp_precision(self) -> Message:
@ -186,7 +191,12 @@ class Engineer(Role):
            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))

        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content=WRITECODE_MSG_SEP.join(code_msg_all), role=self.profile, cause_by=type(self._rc.todo))
+        msg = Message(
+            content=MSG_SEP.join(code_msg_all),
+            role=self.profile,
+            cause_by=type(self._rc.todo),
+            send_to="QaEngineer"
+        )
        return msg

    async def _act(self) -> Message:
--- a/metagpt/roles/qa_engineer.py
+++ b/metagpt/roles/qa_engineer.py
@ -7,15 +7,16 @@
 """
 import re
 from pathlib import Path
+from typing import Type

-from metagpt.actions import WriteTest, WriteCode, WriteDesign, RunCode
+from metagpt.actions import WriteTest, WriteCode, WriteDesign, RunCode, DebugError
 from metagpt.const import WORKSPACE_ROOT
 from metagpt.logs import logger
 from metagpt.roles import Role
 from metagpt.schema import Message
 from metagpt.roles.engineer import Engineer
 from metagpt.utils.common import CodeParser
-from metagpt.utils.special_tokens import WRITECODE_MSG_SEP, FILENAME_CODE_SEP
+from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP

 class QaEngineer(Role):
    def __init__(self, name="Edward", profile="QA Engineer",
@ -51,39 +52,40 @@ class QaEngineer(Role):

    def recv(self, message: Message) -> None:
        self._rc.memory.add(message)
-    
-    async def _act(self) -> Message:
-        code_action_watched = self._rc.important_memory[-1]
-        code_msgs = code_action_watched.content.split(WRITECODE_MSG_SEP)
+
+    async def _write_test(self, message: Message) -> None:
+
+        code_msgs = message.content.split(MSG_SEP)
+        result_msg_all = []
        for code_msg in code_msgs:
-            
+
            # write tests
            file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP)
            test_file_name = "test_" + file_name
            logger.info(f'Writing {test_file_name}..')
-            code = await WriteTest().run(
+            test_code = await WriteTest().run(
                code_to_test=code_to_test,
                test_file_name=test_file_name,
                # source_file_name=file_name,
                source_file_path=file_path,
                workspace=self.get_workspace()
            )
-            self.write_file(test_file_name, code)
-
-            # add to memory
-            msg = Message(content=code, role=self.profile, cause_by=WriteTest)
-            self._rc.memory.add(msg)
+            self.write_file(test_file_name, test_code)

            # run tests
-            stdout, stderr = await RunCode().run(
+            result_msg = await RunCode().run(
                mode="script",
+                code=code_to_test,
+                code_file_name=file_name,
+                test_code=test_code,
+                test_file_name=test_file_name,
+                command=['python', f'tests/{test_file_name}'],
                working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here
                additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name,
                                                                                     # import statement inside package code needs this
-                command=['python', f'tests/{test_file_name}']
            )
-            logger.info(stdout)
-            logger.info(stderr)
+
+            result_msg_all.append(result_msg)

        # RunCode().run(
        #     mode="script",
@ -91,7 +93,27 @@ class QaEngineer(Role):
        #     additional_python_paths=[self.get_workspace(return_proj_dir=False)],
        #     command=['python', '-m', 'unittest', 'discover', '-s', 'tests']
        # )
-
-        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content="all done.", role=self.profile, cause_by=WriteTest)
+        logger.info(f'Done {self.get_workspace()}/tests generating.')
+        msg_content = MSG_SEP.join(result_msg_all)
+        msg = Message(content=msg_content, role=self.profile, cause_by=RunCode, send_to=QaEngineer)
        return msg
+    
+    async def _debug_error(self, msg):
+        # process the msg, if the code works fine, no need to debug
+
+        # else: debug and rewrite the code
+
+        pass
+
+    async def _act(self) -> Message:
+        for msg in self._rc.news:
+            if msg.send_to != "QaEngineer":
+                continue
+            if msg.cause_by == WriteCode:
+                # engineer wrote a code, write a test for it
+                result_msg = await self._write_test(msg)
+            elif msg.cause_by == RunCode:
+                # I wrote and ran my test code, fix bugs, if any
+                result_msg = await self._debug_error(msg)
+            
+        return result_msg
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -70,6 +70,7 @@ class RoleContext(BaseModel):
    state: int = Field(default=0)
    todo: Action = Field(default=None)
    watch: set[Type[Action]] = Field(default_factory=set)
+    news: list[Type[Message]] = Field(default=[])

    class Config:
        arbitrary_types_allowed = True
@ -184,15 +185,15 @@ class Role:
        
        observed = self._rc.env.memory.get_by_actions(self._rc.watch)
        
-        news = self._rc.memory.remember(observed)  # remember recent exact or similar memories
+        self._rc.news = self._rc.memory.remember(observed)  # remember recent exact or similar memories

        for i in env_msgs:
            self.recv(i)

-        news_text = [f"{i.role}: {i.content[:20]}..." for i in news]
+        news_text = [f"{i.role}: {i.content[:20]}..." for i in self._rc.news]
        if news_text:
            logger.debug(f'{self._setting} observed: {news_text}')
-        return len(news)
+        return len(self._rc.news)

    def _publish_message(self, msg):
        """如果role归属于env，那么role的消息会向env广播"""
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -27,6 +27,7 @@ class Message:
    instruct_content: BaseModel = field(default=None)
    role: str = field(default='user')  # system / user / assistant
    cause_by: Type["Action"] = field(default="")
+    send_to: str = field(default="")

    def __str__(self):
        # prefix = '-'.join([self.role, str(self.cause_by)])
--- a/metagpt/utils/special_tokens.py
+++ b/metagpt/utils/special_tokens.py
@ -1,4 +1,4 @@
 # token to separate different code messages in a WriteCode Message content
-WRITECODE_MSG_SEP = "#*000*#" 
+MSG_SEP = "#*000*#" 
 # token to seperate file name and the actual code text in a code message
-FILENAME_CODE_SEP = "#*001*#"
+FILENAME_CODE_SEP = "#*001*#"
--- a/tests/metagpt/actions/test_debug_error.py
+++ b/tests/metagpt/actions/test_debug_error.py
@ -9,6 +9,140 @@ import pytest

 from metagpt.actions.debug_error import DebugError

+EXAMPLE_MSG_CONTENT = '''
+---
+## Development Code File Name
+player.py
+## Development Code
+```python
+from typing import List
+from deck import Deck
+from card import Card
+
+class Player:
+    """
+    A class representing a player in the Black Jack game.
+    """
+
+    def __init__(self, name: str):
+        """
+        Initialize a Player object.
+        
+        Args:
+            name (str): The name of the player.
+        """
+        self.name = name
+        self.hand: List[Card] = []
+        self.score = 0
+
+    def draw(self, deck: Deck):
+        """
+        Draw a card from the deck and add it to the player's hand.
+        
+        Args:
+            deck (Deck): The deck of cards.
+        """
+        card = deck.draw_card()
+        self.hand.append(card)
+        self.calculate_score()
+
+    def calculate_score(self) -> int:
+        """
+        Calculate the score of the player's hand.
+        
+        Returns:
+            int: The score of the player's hand.
+        """
+        self.score = sum(card.value for card in self.hand)
+        # Handle the case where Ace is counted as 11 and causes the score to exceed 21
+        if self.score > 21 and any(card.rank == 'A' for card in self.hand):
+            self.score -= 10
+        return self.score
+
+```
+## Test File Name
+test_player.py
+## Test Code
+```python
+import unittest
+from blackjack_game.player import Player
+from blackjack_game.deck import Deck
+from blackjack_game.card import Card
+
+class TestPlayer(unittest.TestCase):
+    ## Test the Player's initialization
+    def test_player_initialization(self):
+        player = Player("Test Player")
+        self.assertEqual(player.name, "Test Player")
+        self.assertEqual(player.hand, [])
+        self.assertEqual(player.score, 0)
+
+    ## Test the Player's draw method
+    def test_player_draw(self):
+        deck = Deck()
+        player = Player("Test Player")
+        player.draw(deck)
+        self.assertEqual(len(player.hand), 1)
+        self.assertEqual(player.score, player.hand[0].value)
+
+    ## Test the Player's calculate_score method
+    def test_player_calculate_score(self):
+        deck = Deck()
+        player = Player("Test Player")
+        player.draw(deck)
+        player.draw(deck)
+        self.assertEqual(player.score, sum(card.value for card in player.hand))
+
+    ## Test the Player's calculate_score method with Ace card
+    def test_player_calculate_score_with_ace(self):
+        deck = Deck()
+        player = Player("Test Player")
+        player.hand.append(Card('A', 'Hearts', 11))
+        player.hand.append(Card('K', 'Hearts', 10))
+        player.calculate_score()
+        self.assertEqual(player.score, 21)
+
+    ## Test the Player's calculate_score method with multiple Aces
+    def test_player_calculate_score_with_multiple_aces(self):
+        deck = Deck()
+        player = Player("Test Player")
+        player.hand.append(Card('A', 'Hearts', 11))
+        player.hand.append(Card('A', 'Diamonds', 11))
+        player.calculate_score()
+        self.assertEqual(player.score, 12)
+
+if __name__ == '__main__':
+    unittest.main()
+
+```
+## Running Command
+python tests/test_player.py
+## Running Output
+standard output: ;
+standard errors: ..F..
+======================================================================
+FAIL: test_player_calculate_score_with_multiple_aces (__main__.TestPlayer)
+----------------------------------------------------------------------
+Traceback (most recent call last):
+  File "tests/test_player.py", line 46, in test_player_calculate_score_with_multiple_aces
+    self.assertEqual(player.score, 12)
+AssertionError: 22 != 12
+
+----------------------------------------------------------------------
+Ran 5 tests in 0.007s
+
+FAILED (failures=1)
+;
+## instruction:
+The error is in the development code, specifically in the calculate_score method of the Player class. The method is not correctly handling the case where there are multiple Aces in the player's hand. The current implementation only subtracts 10 from the score once if the score is over 21 and there's an Ace in the hand. However, in the case of multiple Aces, it should subtract 10 for each Ace until the score is 21 or less.
+## File To Rewrite
+player.py
+## Status:
+FAIL
+## Send To:
+Engineer
+---
+'''

@pytest.mark.asyncio
 async def test_debug_error():
@ -17,7 +151,6 @@ async def test_debug_error():

    debug_error = DebugError("debug_error")

-    result = await debug_error.run(code, error)
+    result = await debug_error.run(context=EXAMPLE_MSG)

-    # mock_llm.ask.assert_called_once_with(prompt)
-    assert len(result) > 0
+    assert "```python" in result
--- a/tests/metagpt/actions/test_run_code.py
+++ b/tests/metagpt/actions/test_run_code.py
@ -6,33 +6,65 @@
@File    : test_run_code.py
 """
 import pytest
-
+import asyncio
 from metagpt.actions.run_code import RunCode

+@pytest.mark.asyncio
+async def test_run_text():
+    action = RunCode()
+    result, errs = await RunCode.run_text('result = 1 + 1')
+    assert result == 2
+    assert errs == ""
+
+    result, errs = await RunCode.run_text('result = 1 / 0')
+    assert result == ""
+    assert "ZeroDivisionError" in errs

@pytest.mark.asyncio
-async def test_run_code():
-    code = """
-def add(a, b):
-    return a + b
-result = add(1, 2)
-"""
-    run_code = RunCode("run_code")
-
-    result = await run_code.run(code)
-
-    assert result == 3
+async def test_run_script():
+    action = RunCode()
+    
+    # Successful command
+    out, err = await RunCode.run_script(".", command=["echo", "Hello World"])
+    assert out.strip() == "Hello World"
+    assert err == ""

+    # Unsuccessful command
+    out, err = await RunCode.run_script(".", command=["python", "-c", "print(1/0)"])
+    assert "ZeroDivisionError" in err

@pytest.mark.asyncio
-async def test_run_code_with_error():
-    code = """
-def add(a, b):
-    return a + b
-result = add(1, '2')
-"""
-    run_code = RunCode("run_code")
+async def test_run():
+    action = RunCode()
+    result = await action.run(mode="text", code="print('Hello, World')")
+    assert "PASS" in result

-    result = await run_code.run(code)
+    result = await action.run(
+        mode="script",
+        code="echo 'Hello World'",
+        code_file_name="",
+        test_code="",
+        test_file_name="",
+        command=["echo", "Hello World"],
+        working_directory=".",
+        additional_python_paths=[]
+    )
+    assert "PASS" in result

-    assert "TypeError: unsupported operand type(s) for +" in result
+@pytest.mark.asyncio
+async def test_run_failure():
+    action = RunCode()
+    result = await action.run(mode="text", code="result = 1 / 0")
+    assert "FAIL" in result
+
+    result = await action.run(
+        mode="script",
+        code='python -c "print(1/0)"',
+        code_file_name="",
+        test_code="",
+        test_file_name="",
+        command=["python", "-c", "print(1/0)"],
+        working_directory=".",
+        additional_python_paths=[]
+    )
+    assert "FAIL" in result