1. RunCode -> DebugError loop done; 2. modify default k for memory search

2026-07-14 16:32:16 +02:00 · 2023-08-01 12:16:38 +08:00 · 2023-08-01 12:16:38 +08:00 · 8abdca3057
commit 8abdca3057
parent 6bf527d31e
10 changed files with 140 additions and 70 deletions
--- a/metagpt/actions/debug_error.py
+++ b/metagpt/actions/debug_error.py
@ -5,7 +5,9 @@
@Author  : alexanderwu
@File    : debug_error.py
 """
+import re
 from metagpt.actions.action import Action
+from metagpt.utils.common import CodeParser

 PROMPT_TEMPLATE = """
 NOTICE
@ -21,7 +23,7 @@ Now you should start rewriting the code:
 ## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
 """
 class DebugError(Action):
-    def __init__(self, name, context=None, llm=None):
+    def __init__(self, name="DebugError", context=None, llm=None):
        super().__init__(name, context, llm)

    # async def run(self, code, error):
@ -31,8 +33,15 @@ class DebugError(Action):
    #     return fixed_code
    
    async def run(self, context):
+        if "PASS" in context:
+            return "", "the original code works fine, no need to debug"
+        
+        file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
+
        prompt = PROMPT_TEMPLATE.format(context=context)
        
        rsp = await self._aask(prompt)

-        return rsp
+        code = CodeParser.parse_code(block="", text=rsp)
+
+        return file_name, code
--- a/metagpt/actions/run_code.py
+++ b/metagpt/actions/run_code.py
@ -16,8 +16,25 @@ PROMPT_TEMPLATE = """
 Role: You are a senior development and qa engineer, your role is summarize the code running result.
 If the running result does not include an error, you should explicitly approve the result.
 On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error,
-and give specific instructions on fixing the errors.
+and give specific instructions on fixing the errors. Here is the code info:
+{context}
+Now you should begin your analysis
 ---
+## instruction:
+Please summarize the cause of the errors and give correction instruction
+## File To Rewrite:
+Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
+## Status:
+Determine if all of the code works fine, if so write PASS, else FAIL,
+WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
+## Send To:
+Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
+WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
+---
+You should fill in necessary instruction, status, send to, and finally return all content between the --- segment line.
+"""
+
+CONTEXT = """
 ## Development Code File Name
 {code_file_name}
 ## Development Code
@ -35,18 +52,6 @@ and give specific instructions on fixing the errors.
 ## Running Output
 standard output: {outs};
 standard errors: {errs};
-## instruction:
-Please summarize the cause of the errors and give correction instruction
-## File To Rewrite
-Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
-## Status:
-Determine if all of the code works fine, if so write PASS, else FAIL,
-WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
-## Send To:
-Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
-WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
---
-You should fill in necessary summary, status, send to, and finally return all content between the --- segment line.
 """

 class RunCode(Action):
@ -97,15 +102,20 @@ class RunCode(Action):
        elif mode == "text":
            outs, errs = await self.run_text(code=code)
        
-        logger.info(outs)
-        logger.info(errs)
-        
-        prompt = PROMPT_TEMPLATE.format(
+        logger.info(f"{outs=}")
+        logger.info(f"{errs=}")
+
+        context = CONTEXT.format(
            code=code, code_file_name=code_file_name,
            test_code=test_code, test_file_name=test_file_name,
            command=" ".join(command),
-            outs=outs, errs=errs
+            outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow
+            errs=errs
        )
+        
+        prompt = PROMPT_TEMPLATE.format(context=context)
        rsp = await self._aask(prompt)

-        return rsp
+        result = context + rsp
+
+        return result
--- a/metagpt/actions/write_test.py
+++ b/metagpt/actions/write_test.py
@ -30,7 +30,7 @@ you should correctly import the necessary classes based on these file locations!
 """

 class WriteTest(Action):
-    def __init__(self, name="", context=None, llm=None):
+    def __init__(self, name="WriteTest", context=None, llm=None):
        super().__init__(name, context, llm)

    async def write_code(self, prompt):
--- a/metagpt/memory/longterm_memory.py
+++ b/metagpt/memory/longterm_memory.py
@ -43,13 +43,13 @@ class LongTermMemory(Memory):
                # and ignore adding messages from recover repeatedly
                self.memory_storage.add(message)

-    def remember(self, observed: list[Message], k=10) -> list[Message]:
+    def remember(self, observed: list[Message], k=0) -> list[Message]:
        """
        remember the most similar k memories from observed Messages, return all when k=0
            1. remember the short-term memory(stm) news
            2. integrate the stm news with ltm(long-term memory) news
        """
-        stm_news = super(LongTermMemory, self).remember(observed)  # shot-term memory news
+        stm_news = super(LongTermMemory, self).remember(observed, k=k)  # shot-term memory news
        if not self.memory_storage.is_initialized:
            # memory_storage hasn't initialized, use default `remember` to get stm_news
            return stm_news
--- a/metagpt/memory/memory.py
+++ b/metagpt/memory/memory.py
@ -63,7 +63,7 @@ class Memory:
        """Return the most recent k memories, return all when k=0"""
        return self.storage[-k:]

-    def remember(self, observed: list[Message], k=10) -> list[Message]:
+    def remember(self, observed: list[Message], k=0) -> list[Message]:
        """remember the most recent k memories from observed Messages, return all when k=0"""
        already_observed = self.get(k)
        news: list[Message] = []
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -141,7 +141,8 @@ class Engineer(Role):
            msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo))
            self._rc.memory.add(msg)

-            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
+            code_msg = todo + FILENAME_CODE_SEP + str(file_path)
+            code_msg_all.append(code_msg)

        logger.info(f'Done {self.get_workspace()} generating.')
        msg = Message(
@ -188,7 +189,8 @@ class Engineer(Role):
            msg = Message(content=code, role=self.profile, cause_by=WriteCode)
            self._rc.memory.add(msg)

-            code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
+            code_msg = todo + FILENAME_CODE_SEP + str(file_path)
+            code_msg_all.append(code_msg)

        logger.info(f'Done {self.get_workspace()} generating.')
        msg = Message(
--- a/metagpt/roles/qa_engineer.py
+++ b/metagpt/roles/qa_engineer.py
@ -15,7 +15,7 @@ from metagpt.logs import logger
 from metagpt.roles import Role
 from metagpt.schema import Message
 from metagpt.roles.engineer import Engineer
-from metagpt.utils.common import CodeParser
+from metagpt.utils.common import CodeParser, parse_recipient
 from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP

 class QaEngineer(Role):
@ -24,7 +24,9 @@ class QaEngineer(Role):
                 constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain"):
        super().__init__(name, profile, goal, constraints)
        self._init_actions([WriteTest])
-        self._watch([WriteCode])
+        self._watch([WriteCode, WriteTest, RunCode, DebugError])
+        self.test_round = 0
+        self.test_round_allowed = 5 # hard code for 1 WriteTest round + 2 x (RunCode -> DebugError loop)
    
    @classmethod
    def parse_workspace(cls, system_design_msg: Message) -> str:
@ -38,12 +40,11 @@ class QaEngineer(Role):
            return WORKSPACE_ROOT / 'src'
        workspace = self.parse_workspace(msg)
        # project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
-        # source codes directory: workspace/{package_name}/{package_name}
        if return_proj_dir:
            return WORKSPACE_ROOT / workspace
+        # development codes directory: workspace/{package_name}/{package_name}
        return WORKSPACE_ROOT / workspace / workspace
-        
-    
+
    def write_file(self, filename: str, code: str):
        workspace = self.get_workspace() / 'tests'
        file = workspace / filename
@ -60,8 +61,12 @@ class QaEngineer(Role):
        for code_msg in code_msgs:

            # write tests
-            file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP)
+            file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
+            code_to_test = open(file_path, "r").read()
+            if "test" in file_name:
+                continue # Engineer might write some test files, skip testing a test file
            test_file_name = "test_" + file_name
+            test_file_path = self.get_workspace() / "tests" / test_file_name
            logger.info(f'Writing {test_file_name}..')
            test_code = await WriteTest().run(
                code_to_test=code_to_test,
@ -72,48 +77,86 @@ class QaEngineer(Role):
            )
            self.write_file(test_file_name, test_code)

-            # run tests
-            result_msg = await RunCode().run(
-                mode="script",
-                code=code_to_test,
-                code_file_name=file_name,
-                test_code=test_code,
-                test_file_name=test_file_name,
-                command=['python', f'tests/{test_file_name}'],
-                working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here
-                additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name,
-                                                                                     # import statement inside package code needs this
+            # prepare context for run tests in next round
+            command = ['python', f'tests/{test_file_name}']
+            file_info = {
+                "file_name": file_name, "file_path": str(file_path),
+                "test_file_name": test_file_name, "test_file_path": str(test_file_path),
+                "command": command
+            }
+            msg = Message(
+                content=str(file_info), role=self.profile, cause_by=WriteTest,
+                sent_from="QaEngineer", send_to="QaEngineer"
            )
-
-            result_msg_all.append(result_msg)
-
-        # RunCode().run(
-        #     mode="script",
-        #     working_directory=self.get_workspace(),
-        #     additional_python_paths=[self.get_workspace(return_proj_dir=False)],
-        #     command=['python', '-m', 'unittest', 'discover', '-s', 'tests']
-        # )
+            self._publish_message(msg)
+        
        logger.info(f'Done {self.get_workspace()}/tests generating.')
-        msg_content = MSG_SEP.join(result_msg_all)
-        msg = Message(content=msg_content, role=self.profile, cause_by=RunCode, send_to=QaEngineer)
-        return msg
    
+    async def _run_code(self, msg):
+        file_info = eval(msg.content)
+        code_to_test = open(file_info["file_path"], "r").read()
+        test_code = open(file_info["test_file_path"], "r").read()
+        proj_dir = self.get_workspace()
+        development_code_dir = self.get_workspace(return_proj_dir=False)
+
+        result_msg = await RunCode().run(
+            mode="script",
+            code=code_to_test,
+            code_file_name=file_info["file_name"],
+            test_code=test_code,
+            test_file_name=file_info["test_file_name"],
+            command=file_info["command"],
+            working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here
+            additional_python_paths=[development_code_dir], # workspace/package_name/package_name,
+                                                            # import statement inside package code needs this
+        )
+
+        recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself
+        content = str(file_info) + FILENAME_CODE_SEP + result_msg
+        msg = Message(
+            content=content, role=self.profile, cause_by=RunCode,
+            sent_from="QaEngineer", send_to=recipient
+        )
+        self._publish_message(msg)
+
    async def _debug_error(self, msg):
-        # process the msg, if the code works fine, no need to debug
-
-        # else: debug and rewrite the code
-
-        pass
+        file_info, context = msg.content.split(FILENAME_CODE_SEP)
+        file_name, code = await DebugError().run(context)
+        if file_name:
+            self.write_file(file_name, code)
+            recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self
+            msg = Message(content=file_info, role=self.profile, cause_by=DebugError, sent_from="QaEngineer", send_to=recipient)
+            self._publish_message(msg)
+    
+    async def _observe(self) -> int:
+        await super()._observe()
+        self._rc.news = [msg for msg in self._rc.news \
+            if msg.send_to == "QaEngineer"] # only relevant msgs count as observed news
+        return len(self._rc.news)

    async def _act(self) -> Message:
+        if self.test_round > self.test_round_allowed:
+            result_msg = Message(
+                content=f"Exceeding {self.test_round_allowed} rounds of tests, skip (writing code counts as a round, too)",
+                role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
+            )
+            return result_msg
+
        for msg in self._rc.news:
-            if msg.send_to != "QaEngineer":
-                continue
+            # Decide what to do based on observed msg type, currently defined by human,
+            # might potentially be moved to _think, that is, let the agent decides for itself
            if msg.cause_by == WriteCode:
-                # engineer wrote a code, write a test for it
+                # engineer wrote a code, time to write a test for it
                result_msg = await self._write_test(msg)
+            elif msg.cause_by in [WriteTest, DebugError]:
+                # I wrote or debugged my test code, time to run it
+                result_msg = await self._run_code(msg)
            elif msg.cause_by == RunCode:
-                # I wrote and ran my test code, fix bugs, if any
+                # I ran my test code, time to fix bugs, if any
                result_msg = await self._debug_error(msg)
-            
+        self.test_round += 1
+        result_msg = Message(
+            content=f"Round {self.test_round} of tests done",
+            role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
+        )
        return result_msg
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -27,6 +27,7 @@ class Message:
    instruct_content: BaseModel = field(default=None)
    role: str = field(default='user')  # system / user / assistant
    cause_by: Type["Action"] = field(default="")
+    sent_from: str = field(default="")
    send_to: str = field(default="")

    def __str__(self):
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -183,7 +183,7 @@ class CodeParser:
    def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]:
        # Regular expression pattern to find the tasks list.
        code = cls.parse_code(block, text, lang)
-        print(code)
+        # print(code)
        pattern = r'\s*(.*=.*)?(\[.*\])'

        # Extract tasks list string using regex.
@ -230,3 +230,8 @@ def print_members(module, indent=0):
            print(f'{prefix}Function: {name}')
        elif inspect.ismethod(obj):
            print(f'{prefix}Method: {name}')
+
+def parse_recipient(text):
+    pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
+    recipient = re.search(pattern, text)
+    return recipient.group(1) if recipient else ""
--- a/tests/metagpt/actions/test_debug_error.py
+++ b/tests/metagpt/actions/test_debug_error.py
@ -135,7 +135,7 @@ FAILED (failures=1)
 ;
 ## instruction:
 The error is in the development code, specifically in the calculate_score method of the Player class. The method is not correctly handling the case where there are multiple Aces in the player's hand. The current implementation only subtracts 10 from the score once if the score is over 21 and there's an Ace in the hand. However, in the case of multiple Aces, it should subtract 10 for each Ace until the score is 21 or less.
-## File To Rewrite
+## File To Rewrite:
 player.py
 ## Status:
 FAIL
@ -151,6 +151,6 @@ async def test_debug_error():

    debug_error = DebugError("debug_error")

-    result = await debug_error.run(context=EXAMPLE_MSG)
+    file_name, rewritten_code = await debug_error.run(context=EXAMPLE_MSG_CONTENT)

-    assert "```python" in result
+    assert "class TestPlayer" in rewritten_code