diff --git a/metagpt/actions/debug_error.py b/metagpt/actions/debug_error.py index 58a3a7c77..cc64836f8 100644 --- a/metagpt/actions/debug_error.py +++ b/metagpt/actions/debug_error.py @@ -5,7 +5,9 @@ @Author : alexanderwu @File : debug_error.py """ +import re from metagpt.actions.action import Action +from metagpt.utils.common import CodeParser PROMPT_TEMPLATE = """ NOTICE @@ -21,7 +23,7 @@ Now you should start rewriting the code: ## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS ONLY ONE FILE. """ class DebugError(Action): - def __init__(self, name, context=None, llm=None): + def __init__(self, name="DebugError", context=None, llm=None): super().__init__(name, context, llm) # async def run(self, code, error): @@ -31,8 +33,15 @@ class DebugError(Action): # return fixed_code async def run(self, context): + if "PASS" in context: + return "", "the original code works fine, no need to debug" + + file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1) + prompt = PROMPT_TEMPLATE.format(context=context) rsp = await self._aask(prompt) - return rsp + code = CodeParser.parse_code(block="", text=rsp) + + return file_name, code diff --git a/metagpt/actions/run_code.py b/metagpt/actions/run_code.py index b8112b935..c0e166628 100644 --- a/metagpt/actions/run_code.py +++ b/metagpt/actions/run_code.py @@ -16,8 +16,25 @@ PROMPT_TEMPLATE = """ Role: You are a senior development and qa engineer, your role is summarize the code running result. If the running result does not include an error, you should explicitly approve the result. On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error, -and give specific instructions on fixing the errors. +and give specific instructions on fixing the errors. Here is the code info: +{context} +Now you should begin your analysis --- +## instruction: +Please summarize the cause of the errors and give correction instruction +## File To Rewrite: +Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py +## Status: +Determine if all of the code works fine, if so write PASS, else FAIL, +WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION +## Send To: +Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors, +WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION. +--- +You should fill in necessary instruction, status, send to, and finally return all content between the --- segment line. +""" + +CONTEXT = """ ## Development Code File Name {code_file_name} ## Development Code @@ -35,18 +52,6 @@ and give specific instructions on fixing the errors. ## Running Output standard output: {outs}; standard errors: {errs}; -## instruction: -Please summarize the cause of the errors and give correction instruction -## File To Rewrite -Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py -## Status: -Determine if all of the code works fine, if so write PASS, else FAIL, -WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION -## Send To: -Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors, -WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION. ---- -You should fill in necessary summary, status, send to, and finally return all content between the --- segment line. """ class RunCode(Action): @@ -97,15 +102,20 @@ class RunCode(Action): elif mode == "text": outs, errs = await self.run_text(code=code) - logger.info(outs) - logger.info(errs) - - prompt = PROMPT_TEMPLATE.format( + logger.info(f"{outs=}") + logger.info(f"{errs=}") + + context = CONTEXT.format( code=code, code_file_name=code_file_name, test_code=test_code, test_file_name=test_file_name, command=" ".join(command), - outs=outs, errs=errs + outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow + errs=errs ) + + prompt = PROMPT_TEMPLATE.format(context=context) rsp = await self._aask(prompt) - return rsp + result = context + rsp + + return result diff --git a/metagpt/actions/write_test.py b/metagpt/actions/write_test.py index e5072a048..511399f7d 100644 --- a/metagpt/actions/write_test.py +++ b/metagpt/actions/write_test.py @@ -30,7 +30,7 @@ you should correctly import the necessary classes based on these file locations! """ class WriteTest(Action): - def __init__(self, name="", context=None, llm=None): + def __init__(self, name="WriteTest", context=None, llm=None): super().__init__(name, context, llm) async def write_code(self, prompt): diff --git a/metagpt/memory/longterm_memory.py b/metagpt/memory/longterm_memory.py index 8521c046b..154fcfbda 100644 --- a/metagpt/memory/longterm_memory.py +++ b/metagpt/memory/longterm_memory.py @@ -43,13 +43,13 @@ class LongTermMemory(Memory): # and ignore adding messages from recover repeatedly self.memory_storage.add(message) - def remember(self, observed: list[Message], k=10) -> list[Message]: + def remember(self, observed: list[Message], k=0) -> list[Message]: """ remember the most similar k memories from observed Messages, return all when k=0 1. remember the short-term memory(stm) news 2. integrate the stm news with ltm(long-term memory) news """ - stm_news = super(LongTermMemory, self).remember(observed) # shot-term memory news + stm_news = super(LongTermMemory, self).remember(observed, k=k) # shot-term memory news if not self.memory_storage.is_initialized: # memory_storage hasn't initialized, use default `remember` to get stm_news return stm_news diff --git a/metagpt/memory/memory.py b/metagpt/memory/memory.py index 5d3b736a3..a96aaf1be 100644 --- a/metagpt/memory/memory.py +++ b/metagpt/memory/memory.py @@ -63,7 +63,7 @@ class Memory: """Return the most recent k memories, return all when k=0""" return self.storage[-k:] - def remember(self, observed: list[Message], k=10) -> list[Message]: + def remember(self, observed: list[Message], k=0) -> list[Message]: """remember the most recent k memories from observed Messages, return all when k=0""" already_observed = self.get(k) news: list[Message] = [] diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index c352f403d..63c068746 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -141,7 +141,8 @@ class Engineer(Role): msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo)) self._rc.memory.add(msg) - code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code])) + code_msg = todo + FILENAME_CODE_SEP + str(file_path) + code_msg_all.append(code_msg) logger.info(f'Done {self.get_workspace()} generating.') msg = Message( @@ -188,7 +189,8 @@ class Engineer(Role): msg = Message(content=code, role=self.profile, cause_by=WriteCode) self._rc.memory.add(msg) - code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code])) + code_msg = todo + FILENAME_CODE_SEP + str(file_path) + code_msg_all.append(code_msg) logger.info(f'Done {self.get_workspace()} generating.') msg = Message( diff --git a/metagpt/roles/qa_engineer.py b/metagpt/roles/qa_engineer.py index 57ec04be9..597249dff 100644 --- a/metagpt/roles/qa_engineer.py +++ b/metagpt/roles/qa_engineer.py @@ -15,7 +15,7 @@ from metagpt.logs import logger from metagpt.roles import Role from metagpt.schema import Message from metagpt.roles.engineer import Engineer -from metagpt.utils.common import CodeParser +from metagpt.utils.common import CodeParser, parse_recipient from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP class QaEngineer(Role): @@ -24,7 +24,9 @@ class QaEngineer(Role): constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain"): super().__init__(name, profile, goal, constraints) self._init_actions([WriteTest]) - self._watch([WriteCode]) + self._watch([WriteCode, WriteTest, RunCode, DebugError]) + self.test_round = 0 + self.test_round_allowed = 5 # hard code for 1 WriteTest round + 2 x (RunCode -> DebugError loop) @classmethod def parse_workspace(cls, system_design_msg: Message) -> str: @@ -38,12 +40,11 @@ class QaEngineer(Role): return WORKSPACE_ROOT / 'src' workspace = self.parse_workspace(msg) # project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc. - # source codes directory: workspace/{package_name}/{package_name} if return_proj_dir: return WORKSPACE_ROOT / workspace + # development codes directory: workspace/{package_name}/{package_name} return WORKSPACE_ROOT / workspace / workspace - - + def write_file(self, filename: str, code: str): workspace = self.get_workspace() / 'tests' file = workspace / filename @@ -60,8 +61,12 @@ class QaEngineer(Role): for code_msg in code_msgs: # write tests - file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP) + file_name, file_path = code_msg.split(FILENAME_CODE_SEP) + code_to_test = open(file_path, "r").read() + if "test" in file_name: + continue # Engineer might write some test files, skip testing a test file test_file_name = "test_" + file_name + test_file_path = self.get_workspace() / "tests" / test_file_name logger.info(f'Writing {test_file_name}..') test_code = await WriteTest().run( code_to_test=code_to_test, @@ -72,48 +77,86 @@ class QaEngineer(Role): ) self.write_file(test_file_name, test_code) - # run tests - result_msg = await RunCode().run( - mode="script", - code=code_to_test, - code_file_name=file_name, - test_code=test_code, - test_file_name=test_file_name, - command=['python', f'tests/{test_file_name}'], - working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here - additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name, - # import statement inside package code needs this + # prepare context for run tests in next round + command = ['python', f'tests/{test_file_name}'] + file_info = { + "file_name": file_name, "file_path": str(file_path), + "test_file_name": test_file_name, "test_file_path": str(test_file_path), + "command": command + } + msg = Message( + content=str(file_info), role=self.profile, cause_by=WriteTest, + sent_from="QaEngineer", send_to="QaEngineer" ) - - result_msg_all.append(result_msg) - - # RunCode().run( - # mode="script", - # working_directory=self.get_workspace(), - # additional_python_paths=[self.get_workspace(return_proj_dir=False)], - # command=['python', '-m', 'unittest', 'discover', '-s', 'tests'] - # ) + self._publish_message(msg) + logger.info(f'Done {self.get_workspace()}/tests generating.') - msg_content = MSG_SEP.join(result_msg_all) - msg = Message(content=msg_content, role=self.profile, cause_by=RunCode, send_to=QaEngineer) - return msg + async def _run_code(self, msg): + file_info = eval(msg.content) + code_to_test = open(file_info["file_path"], "r").read() + test_code = open(file_info["test_file_path"], "r").read() + proj_dir = self.get_workspace() + development_code_dir = self.get_workspace(return_proj_dir=False) + + result_msg = await RunCode().run( + mode="script", + code=code_to_test, + code_file_name=file_info["file_name"], + test_code=test_code, + test_file_name=file_info["test_file_name"], + command=file_info["command"], + working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here + additional_python_paths=[development_code_dir], # workspace/package_name/package_name, + # import statement inside package code needs this + ) + + recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself + content = str(file_info) + FILENAME_CODE_SEP + result_msg + msg = Message( + content=content, role=self.profile, cause_by=RunCode, + sent_from="QaEngineer", send_to=recipient + ) + self._publish_message(msg) + async def _debug_error(self, msg): - # process the msg, if the code works fine, no need to debug - - # else: debug and rewrite the code - - pass + file_info, context = msg.content.split(FILENAME_CODE_SEP) + file_name, code = await DebugError().run(context) + if file_name: + self.write_file(file_name, code) + recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self + msg = Message(content=file_info, role=self.profile, cause_by=DebugError, sent_from="QaEngineer", send_to=recipient) + self._publish_message(msg) + + async def _observe(self) -> int: + await super()._observe() + self._rc.news = [msg for msg in self._rc.news \ + if msg.send_to == "QaEngineer"] # only relevant msgs count as observed news + return len(self._rc.news) async def _act(self) -> Message: + if self.test_round > self.test_round_allowed: + result_msg = Message( + content=f"Exceeding {self.test_round_allowed} rounds of tests, skip (writing code counts as a round, too)", + role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to="" + ) + return result_msg + for msg in self._rc.news: - if msg.send_to != "QaEngineer": - continue + # Decide what to do based on observed msg type, currently defined by human, + # might potentially be moved to _think, that is, let the agent decides for itself if msg.cause_by == WriteCode: - # engineer wrote a code, write a test for it + # engineer wrote a code, time to write a test for it result_msg = await self._write_test(msg) + elif msg.cause_by in [WriteTest, DebugError]: + # I wrote or debugged my test code, time to run it + result_msg = await self._run_code(msg) elif msg.cause_by == RunCode: - # I wrote and ran my test code, fix bugs, if any + # I ran my test code, time to fix bugs, if any result_msg = await self._debug_error(msg) - + self.test_round += 1 + result_msg = Message( + content=f"Round {self.test_round} of tests done", + role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to="" + ) return result_msg diff --git a/metagpt/schema.py b/metagpt/schema.py index b96bbd868..64db39d0d 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -27,6 +27,7 @@ class Message: instruct_content: BaseModel = field(default=None) role: str = field(default='user') # system / user / assistant cause_by: Type["Action"] = field(default="") + sent_from: str = field(default="") send_to: str = field(default="") def __str__(self): diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 472f1e655..0cd73ec0b 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -183,7 +183,7 @@ class CodeParser: def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]: # Regular expression pattern to find the tasks list. code = cls.parse_code(block, text, lang) - print(code) + # print(code) pattern = r'\s*(.*=.*)?(\[.*\])' # Extract tasks list string using regex. @@ -230,3 +230,8 @@ def print_members(module, indent=0): print(f'{prefix}Function: {name}') elif inspect.ismethod(obj): print(f'{prefix}Method: {name}') + +def parse_recipient(text): + pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now + recipient = re.search(pattern, text) + return recipient.group(1) if recipient else "" diff --git a/tests/metagpt/actions/test_debug_error.py b/tests/metagpt/actions/test_debug_error.py index f6260f8ee..23eecb2f8 100644 --- a/tests/metagpt/actions/test_debug_error.py +++ b/tests/metagpt/actions/test_debug_error.py @@ -135,7 +135,7 @@ FAILED (failures=1) ; ## instruction: The error is in the development code, specifically in the calculate_score method of the Player class. The method is not correctly handling the case where there are multiple Aces in the player's hand. The current implementation only subtracts 10 from the score once if the score is over 21 and there's an Ace in the hand. However, in the case of multiple Aces, it should subtract 10 for each Ace until the score is 21 or less. -## File To Rewrite +## File To Rewrite: player.py ## Status: FAIL @@ -151,6 +151,6 @@ async def test_debug_error(): debug_error = DebugError("debug_error") - result = await debug_error.run(context=EXAMPLE_MSG) + file_name, rewritten_code = await debug_error.run(context=EXAMPLE_MSG_CONTENT) - assert "```python" in result + assert "class TestPlayer" in rewritten_code