Merge branch 'main' into feature/teacher

2026-04-27 01:36:29 +02:00 · 2023-08-07 10:12:15 +08:00 · 2023-08-07 10:12:15 +08:00 · 28c3bfd036
commit 28c3bfd036
parent b8901f2bb1 0601ad9c76
30 changed files with 800 additions and 129 deletions
--- a/metagpt/actions/debug_error.py
+++ b/metagpt/actions/debug_error.py
@ -5,15 +5,47 @@
@Author  : alexanderwu
@File    : debug_error.py
 """
+import re
+
+from metagpt.logs import logger
 from metagpt.actions.action import Action
+from metagpt.utils.common import CodeParser

-
+PROMPT_TEMPLATE = """
+NOTICE
+1. Role: You are a Development Engineer or QA engineer;
+2. Task: You received this message from another Development Engineer or QA engineer who ran or tested your code. 
+Based on the message, first, figure out your own role, i.e. Engineer or QaEngineer,
+then rewrite the development code or the test code based on your role, the error, and the summary, such that all bugs are fixed and the code performs well.
+Attention: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script and triple quotes.
+The message is as follows:
+{context}
+---
+Now you should start rewriting the code:
+## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS IN ONLY ONE FILE.
+"""
 class DebugError(Action):
-    def __init__(self, name, context=None, llm=None):
+    def __init__(self, name="DebugError", context=None, llm=None):
        super().__init__(name, context, llm)

-    async def run(self, code, error):
-        prompt = f"Here is a piece of Python code:\n\n{code}\n\nThe following error occurred during execution:" \
-                 f"\n\n{error}\n\nPlease try to fix the error in this code."
-        fixed_code = await self._aask(prompt)
-        return fixed_code
+    # async def run(self, code, error):
+    #     prompt = f"Here is a piece of Python code:\n\n{code}\n\nThe following error occurred during execution:" \
+    #              f"\n\n{error}\n\nPlease try to fix the error in this code."
+    #     fixed_code = await self._aask(prompt)
+    #     return fixed_code
+    
+    async def run(self, context):
+        if "PASS" in context:
+            return "", "the original code works fine, no need to debug"
+        
+        file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
+
+        logger.info(f"Debug and rewrite {file_name}")
+
+        prompt = PROMPT_TEMPLATE.format(context=context)
+        
+        rsp = await self._aask(prompt)
+
+        code = CodeParser.parse_code(block="", text=rsp)
+
+        return file_name, code
--- a/metagpt/actions/run_code.py
+++ b/metagpt/actions/run_code.py
@ -6,20 +6,118 @@
@File    : run_code.py
 """
 import traceback
+import os
+import subprocess
+from typing import List, Tuple

+from metagpt.logs import logger
 from metagpt.actions.action import Action

+PROMPT_TEMPLATE = """
+Role: You are a senior development and qa engineer, your role is summarize the code running result.
+If the running result does not include an error, you should explicitly approve the result.
+On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error,
+and give specific instructions on fixing the errors. Here is the code info:
+{context}
+Now you should begin your analysis
+---
+## instruction:
+Please summarize the cause of the errors and give correction instruction
+## File To Rewrite:
+Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
+## Status:
+Determine if all of the code works fine, if so write PASS, else FAIL,
+WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
+## Send To:
+Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
+WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
+---
+You should fill in necessary instruction, status, send to, and finally return all content between the --- segment line.
+"""
+
+CONTEXT = """
+## Development Code File Name
+{code_file_name}
+## Development Code
+```python
+{code}
+```
+## Test File Name
+{test_file_name}
+## Test Code
+```python
+{test_code}
+```
+## Running Command
+{command}
+## Running Output
+standard output: {outs};
+standard errors: {errs};
+"""

 class RunCode(Action):
-    def __init__(self, name, context=None, llm=None):
+    def __init__(self, name="RunCode", context=None, llm=None):
        super().__init__(name, context, llm)

-    async def run(self, code):
+    @classmethod
+    async def run_text(cls, code) -> Tuple[str, str]:
        try:
            # We will document_store the result in this dictionary
            namespace = {}
            exec(code, namespace)
-            return namespace.get('result', None)
+            return namespace.get('result', ""), ""
        except Exception:
            # If there is an error in the code, return the error message
-            return traceback.format_exc()
+            return "", traceback.format_exc()
+
+    @classmethod
+    async def run_script(cls, working_directory, additional_python_paths=[], command=[]) -> Tuple[str, str]:
+        working_directory = str(working_directory)
+        additional_python_paths = [str(path) for path in additional_python_paths]
+
+        # Copy the current environment variables
+        env = os.environ.copy()
+
+        # Modify the PYTHONPATH environment variable
+        additional_python_paths = [working_directory] + additional_python_paths
+        additional_python_paths = ":".join(additional_python_paths)
+        env['PYTHONPATH'] = additional_python_paths + ':' + env.get('PYTHONPATH', '')
+
+        # Start the subprocess
+        process = subprocess.Popen(command, cwd=working_directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
+
+        try:
+            # Wait for the process to complete, with a timeout
+            stdout, stderr = process.communicate(timeout=10)
+        except subprocess.TimeoutExpired:
+            logger.info("The command did not complete within the given timeout.")
+            process.kill()  # Kill the process if it times out
+            stdout, stderr = process.communicate()
+        return stdout.decode('utf-8'), stderr.decode('utf-8')
+
+    async def run(
+        self, code, mode="script", code_file_name="", test_code="", test_file_name="", command=[], **kwargs
+    ) -> str:
+        logger.info(f"Running {' '.join(command)}")
+        if mode == "script":
+            outs, errs = await self.run_script(command=command, **kwargs)
+        elif mode == "text":
+            outs, errs = await self.run_text(code=code)
+
+        logger.info(f"{outs=}")
+        logger.info(f"{errs=}")
+
+        context = CONTEXT.format(
+            code=code, code_file_name=code_file_name,
+            test_code=test_code, test_file_name=test_file_name,
+            command=" ".join(command),
+            outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow
+            errs=errs[:10000] # truncate errors to avoid token overflow
+        )
+
+        prompt = PROMPT_TEMPLATE.format(context=context)
+        rsp = await self._aask(prompt)
+
+        result = context + rsp
+
+        return result
--- a/metagpt/actions/write_test.py
+++ b/metagpt/actions/write_test.py
@ -5,22 +5,45 @@
@Author  : alexanderwu
@File    : write_test.py
 """
+from metagpt.logs import logger
 from metagpt.actions.action import Action
+from metagpt.utils.common import CodeParser

+PROMPT_TEMPLATE = """
+NOTICE
+1. Role: You are a QA engineer; the main goal is to design, develop, and execute PEP8 compliant, well-structured, maintainable test cases and scripts for Python 3.9. Your focus should be on ensuring the product quality of the entire project through systematic testing.
+2. Requirement: Based on the context, develop a comprehensive test suite that adequately covers all relevant aspects of the code file under review. Your test suite will be part of the overall project QA, so please develop complete, robust, and reusable test cases.
+3. Attention1: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script.
+4. Attention2: If there are any settings in your tests, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE.
+5. Attention3: YOU MUST FOLLOW "Data structures and interface definitions". DO NOT CHANGE ANY DESIGN. Make sure your tests respect the existing design and ensure its validity.
+6. Think before writing: What should be tested and validated in this document? What edge cases could exist? What might fail?
+7. CAREFULLY CHECK THAT YOU DON'T MISS ANY NECESSARY TEST CASES/SCRIPTS IN THIS FILE.
+Attention: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the test case or script and triple quotes.
+-----
+## Given the following code, please write appropriate test cases using Python's unittest framework to verify the correctness and robustness of this code:
+```python
+{code_to_test}
+```
+Note that the code to test is at {source_file_path}, we will put your test code at {workspace}/tests/{test_file_name}, and run your test code from {workspace},
+you should correctly import the necessary classes based on these file locations!
+## {test_file_name}: Write test code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
+"""

 class WriteTest(Action):
-    def __init__(self, name="", context=None, llm=None):
+    def __init__(self, name="WriteTest", context=None, llm=None):
        super().__init__(name, context, llm)
-        self.code = None
-        self.test_prompt_template = """
-        Given the following code or function:
-        {code}

-        As a test engineer, please write appropriate test cases using Python's unittest framework to verify the correctness and robustness of this code.
-        """
+    async def write_code(self, prompt):
+        code_rsp = await self._aask(prompt)
+        code = CodeParser.parse_code(block="", text=code_rsp)
+        return code

-    async def run(self, code):
-        self.code = code
-        prompt = self.test_prompt_template.format(code=self.code)
-        test_cases = await self._aask(prompt)
-        return test_cases
+    async def run(self, code_to_test, test_file_name, source_file_path, workspace):
+        prompt = PROMPT_TEMPLATE.format(
+            code_to_test=code_to_test,
+            test_file_name=test_file_name,
+            source_file_path=source_file_path,
+            workspace=workspace
+        )
+        code = await self.write_code(prompt)
+        return code
--- a/metagpt/config.py
+++ b/metagpt/config.py
@ -77,6 +77,12 @@ class Config(metaclass=Singleton):
            logger.warning("LONG_TERM_MEMORY is True")
        self.max_budget = self._get("MAX_BUDGET", 10.0)
        self.total_cost = 0.0
+        self.puppeteer_config = self._get("PUPPETEER_CONFIG","")
+        self.mmdc = self._get("MMDC","mmdc")
+        self.update_costs = self._get("UPDATE_COSTS",True)
+        self.calc_usage = self._get("CALC_USAGE",True)
+
+        

    def _init_with_config_files_and_env(self, configs: dict, yaml_file):
        """从config/key.yaml / config/config.yaml / env三处按优先级递减加载"""
--- a/metagpt/document_store/faiss_store.py
+++ b/metagpt/document_store/faiss_store.py
@ -67,6 +67,7 @@ class FaissStore(LocalStore):

        self.store = self._write(docs, metadatas)
        self.persist()
+        return self.store

    def add(self, texts: list[str], *args, **kwargs) -> list[str]:
        """FIXME: 目前add之后没有更新store"""
--- a/metagpt/memory/longterm_memory.py
+++ b/metagpt/memory/longterm_memory.py
@ -43,13 +43,13 @@ class LongTermMemory(Memory):
                # and ignore adding messages from recover repeatedly
                self.memory_storage.add(message)

-    def remember(self, observed: list[Message], k=10) -> list[Message]:
+    def remember(self, observed: list[Message], k=0) -> list[Message]:
        """
        remember the most similar k memories from observed Messages, return all when k=0
            1. remember the short-term memory(stm) news
            2. integrate the stm news with ltm(long-term memory) news
        """
-        stm_news = super(LongTermMemory, self).remember(observed)  # shot-term memory news
+        stm_news = super(LongTermMemory, self).remember(observed, k=k)  # shot-term memory news
        if not self.memory_storage.is_initialized:
            # memory_storage hasn't initialized, use default `remember` to get stm_news
            return stm_news
--- a/metagpt/memory/memory.py
+++ b/metagpt/memory/memory.py
@ -63,7 +63,7 @@ class Memory:
        """Return the most recent k memories, return all when k=0"""
        return self.storage[-k:]

-    def remember(self, observed: list[Message], k=10) -> list[Message]:
+    def remember(self, observed: list[Message], k=0) -> list[Message]:
        """remember the most recent k memories from observed Messages, return all when k=0"""
        already_observed = self.get(k)
        news: list[Message] = []
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -229,10 +229,11 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):

    def _calc_usage(self, messages: list[dict], rsp: str) -> dict:
        usage = {}
-        prompt_tokens = count_message_tokens(messages, self.model)
-        completion_tokens = count_string_tokens(rsp, self.model)
-        usage['prompt_tokens'] = prompt_tokens
-        usage['completion_tokens'] = completion_tokens
+        if CONFIG.calc_usage:
+            prompt_tokens = count_message_tokens(messages, self.model)
+            completion_tokens = count_string_tokens(rsp, self.model)
+            usage['prompt_tokens'] = prompt_tokens
+            usage['completion_tokens'] = completion_tokens
        return usage

    async def acompletion_batch(self, batch: list[list[dict]]) -> list[dict]:
@ -262,9 +263,10 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
        return results

    def _update_costs(self, usage: dict):
-        prompt_tokens = int(usage['prompt_tokens'])
-        completion_tokens = int(usage['completion_tokens'])
-        self._cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
+        if CONFIG.update_costs:
+            prompt_tokens = int(usage['prompt_tokens'])
+            completion_tokens = int(usage['completion_tokens'])
+            self._cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)

    def get_costs(self) -> Costs:
        return self._cost_manager.get_costs()
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -16,6 +16,7 @@ from metagpt.roles import Role
 from metagpt.actions import WriteCode, WriteCodeReview, WriteTasks, WriteDesign
 from metagpt.schema import Message
 from metagpt.utils.common import CodeParser
+from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP


 async def gather_ordered_k(coros, k) -> list:
@ -60,7 +61,7 @@ class Engineer(Role):

    @classmethod
    def parse_tasks(self, task_msg: Message) -> list[str]:
-        if not task_msg.instruct_content:
+        if task_msg.instruct_content:
            return task_msg.instruct_content.dict().get("Task list")
        return CodeParser.parse_file_list(block="Task list", text=task_msg.content)

@ -70,8 +71,8 @@ class Engineer(Role):

    @classmethod
    def parse_workspace(cls, system_design_msg: Message) -> str:
-        if not system_design_msg.instruct_content:
-            return system_design_msg.instruct_content.dict().get("Python package name")
+        if system_design_msg.instruct_content:
+            return system_design_msg.instruct_content.dict().get("Python package name").strip().strip("'").strip("\"")
        return CodeParser.parse_str(block="Python package name", text=system_design_msg.content)

    def get_workspace(self) -> Path:
@ -95,6 +96,7 @@ class Engineer(Role):
        file = workspace / filename
        file.parent.mkdir(parents=True, exist_ok=True)
        file.write_text(code)
+        return file

    def recv(self, message: Message) -> None:
        self._rc.memory.add(message)
@ -126,23 +128,33 @@ class Engineer(Role):
        return msg

    async def _act_sp(self) -> Message:
+        code_msg_all = [] # gather all code info, will pass to qa_engineer for tests later
        for todo in self.todos:
-            code_rsp = await WriteCode().run(
+            code = await WriteCode().run(
                context=self._rc.history,
                filename=todo
            )
            # logger.info(todo)
            # logger.info(code_rsp)
            # code = self.parse_code(code_rsp)
-            self.write_file(todo, code_rsp)
-            msg = Message(content=code_rsp, role=self.profile, cause_by=type(self._rc.todo))
+            file_path = self.write_file(todo, code)
+            msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo))
            self._rc.memory.add(msg)

+            code_msg = todo + FILENAME_CODE_SEP + str(file_path)
+            code_msg_all.append(code_msg)
+
        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content="all done.", role=self.profile, cause_by=type(self._rc.todo))
+        msg = Message(
+            content=MSG_SEP.join(code_msg_all),
+            role=self.profile,
+            cause_by=type(self._rc.todo),
+            send_to="QaEngineer"
+        )
        return msg

    async def _act_sp_precision(self) -> Message:
+        code_msg_all = [] # gather all code info, will pass to qa_engineer for tests later
        for todo in self.todos:
            """
            # 从历史信息中挑选必须的信息，以减少prompt长度（人工经验总结）
@ -173,12 +185,20 @@ class Engineer(Role):
                except Exception as e:
                    logger.error("code review failed!", e)
                    pass
-            self.write_file(todo, code)
+            file_path = self.write_file(todo, code)
            msg = Message(content=code, role=self.profile, cause_by=WriteCode)
            self._rc.memory.add(msg)

+            code_msg = todo + FILENAME_CODE_SEP + str(file_path)
+            code_msg_all.append(code_msg)
+
        logger.info(f'Done {self.get_workspace()} generating.')
-        msg = Message(content="all done.", role=self.profile, cause_by=WriteCode)
+        msg = Message(
+            content=MSG_SEP.join(code_msg_all),
+            role=self.profile,
+            cause_by=type(self._rc.todo),
+            send_to="QaEngineer"
+        )
        return msg

    async def _act(self) -> Message:
--- a/metagpt/roles/qa_engineer.py
+++ b/metagpt/roles/qa_engineer.py
@ -5,11 +5,162 @@
@Author  : alexanderwu
@File    : qa_engineer.py
 """
-from metagpt.actions import WriteTest
-from metagpt.roles import Role
+import os
+import re
+from pathlib import Path
+from typing import Type

+from metagpt.actions import WriteTest, WriteCode, WriteDesign, RunCode, DebugError
+from metagpt.const import WORKSPACE_ROOT
+from metagpt.logs import logger
+from metagpt.roles import Role
+from metagpt.schema import Message
+from metagpt.roles.engineer import Engineer
+from metagpt.utils.common import CodeParser, parse_recipient
+from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP

 class QaEngineer(Role):
-    def __init__(self, name, profile, goal, constraints):
+    def __init__(self, name="Edward", profile="QaEngineer",
+                 goal="Write comprehensive and robust tests to ensure codes will work as expected without bugs",
+                 constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain",
+                 test_round_allowed=5):
        super().__init__(name, profile, goal, constraints)
-        self._init_actions([WriteTest])
+        self._init_actions([WriteTest]) # FIXME: a bit hack here, only init one action to circumvent _think() logic, will overwrite _think() in future updates
+        self._watch([WriteCode, WriteTest, RunCode, DebugError])
+        self.test_round = 0
+        self.test_round_allowed = test_round_allowed
+    
+    @classmethod
+    def parse_workspace(cls, system_design_msg: Message) -> str:
+        if not system_design_msg.instruct_content:
+            return system_design_msg.instruct_content.dict().get("Python package name")
+        return CodeParser.parse_str(block="Python package name", text=system_design_msg.content)
+    
+    def get_workspace(self, return_proj_dir=True) -> Path:
+        msg = self._rc.memory.get_by_action(WriteDesign)[-1]
+        if not msg:
+            return WORKSPACE_ROOT / 'src'
+        workspace = self.parse_workspace(msg)
+        # project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
+        if return_proj_dir:
+            return WORKSPACE_ROOT / workspace
+        # development codes directory: workspace/{package_name}/{package_name}
+        return WORKSPACE_ROOT / workspace / workspace
+
+    def write_file(self, filename: str, code: str):
+        workspace = self.get_workspace() / 'tests'
+        file = workspace / filename
+        file.parent.mkdir(parents=True, exist_ok=True)
+        file.write_text(code)
+
+    async def _write_test(self, message: Message) -> None:
+
+        code_msgs = message.content.split(MSG_SEP)
+        result_msg_all = []
+        for code_msg in code_msgs:
+
+            # write tests
+            file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
+            code_to_test = open(file_path, "r").read()
+            if "test" in file_name:
+                continue # Engineer might write some test files, skip testing a test file
+            test_file_name = "test_" + file_name
+            test_file_path = self.get_workspace() / "tests" / test_file_name
+            logger.info(f'Writing {test_file_name}..')
+            test_code = await WriteTest().run(
+                code_to_test=code_to_test,
+                test_file_name=test_file_name,
+                # source_file_name=file_name,
+                source_file_path=file_path,
+                workspace=self.get_workspace()
+            )
+            self.write_file(test_file_name, test_code)
+
+            # prepare context for run tests in next round
+            command = ['python', f'tests/{test_file_name}']
+            file_info = {
+                "file_name": file_name, "file_path": str(file_path),
+                "test_file_name": test_file_name, "test_file_path": str(test_file_path),
+                "command": command
+            }
+            msg = Message(
+                content=str(file_info), role=self.profile, cause_by=WriteTest,
+                sent_from=self.profile, send_to=self.profile
+            )
+            self._publish_message(msg)
+        
+        logger.info(f'Done {self.get_workspace()}/tests generating.')
+    
+    async def _run_code(self, msg):
+        file_info = eval(msg.content)
+        development_file_path = file_info["file_path"]
+        test_file_path = file_info["test_file_path"]
+        if not os.path.exists(development_file_path) or not os.path.exists(test_file_path):
+            return
+
+        development_code = open(development_file_path, "r").read()
+        test_code = open(test_file_path, "r").read()
+        proj_dir = self.get_workspace()
+        development_code_dir = self.get_workspace(return_proj_dir=False)
+
+        result_msg = await RunCode().run(
+            mode="script",
+            code=development_code,
+            code_file_name=file_info["file_name"],
+            test_code=test_code,
+            test_file_name=file_info["test_file_name"],
+            command=file_info["command"],
+            working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here
+            additional_python_paths=[development_code_dir], # workspace/package_name/package_name,
+                                                            # import statement inside package code needs this
+        )
+
+        recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself
+        content = str(file_info) + FILENAME_CODE_SEP + result_msg
+        msg = Message(
+            content=content, role=self.profile, cause_by=RunCode,
+            sent_from=self.profile, send_to=recipient
+        )
+        self._publish_message(msg)
+
+    async def _debug_error(self, msg):
+        file_info, context = msg.content.split(FILENAME_CODE_SEP)
+        file_name, code = await DebugError().run(context)
+        if file_name:
+            self.write_file(file_name, code)
+            recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self
+            msg = Message(content=file_info, role=self.profile, cause_by=DebugError, sent_from=self.profile, send_to=recipient)
+            self._publish_message(msg)
+    
+    async def _observe(self) -> int:
+        await super()._observe()
+        self._rc.news = [msg for msg in self._rc.news \
+            if msg.send_to == self.profile] # only relevant msgs count as observed news
+        return len(self._rc.news)
+
+    async def _act(self) -> Message:
+        if self.test_round > self.test_round_allowed:
+            result_msg = Message(
+                content=f"Exceeding {self.test_round_allowed} rounds of tests, skip (writing code counts as a round, too)",
+                role=self.profile, cause_by=WriteTest, sent_from=self.profile, send_to=""
+            )
+            return result_msg
+
+        for msg in self._rc.news:
+            # Decide what to do based on observed msg type, currently defined by human,
+            # might potentially be moved to _think, that is, let the agent decides for itself
+            if msg.cause_by == WriteCode:
+                # engineer wrote a code, time to write a test for it
+                await self._write_test(msg)
+            elif msg.cause_by in [WriteTest, DebugError]:
+                # I wrote or debugged my test code, time to run it
+                await self._run_code(msg)
+            elif msg.cause_by == RunCode:
+                # I ran my test code, time to fix bugs, if any
+                await self._debug_error(msg)
+        self.test_round += 1
+        result_msg = Message(
+            content=f"Round {self.test_round} of tests done",
+            role=self.profile, cause_by=WriteTest, sent_from=self.profile, send_to=""
+        )
+        return result_msg
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -72,6 +72,7 @@ class RoleContext(BaseModel):
    state: int = Field(default=0)
    todo: Action = Field(default=None)
    watch: set[Type[Action]] = Field(default_factory=set)
+    news: list[Type[Message]] = Field(default=[])

    class Config:
        arbitrary_types_allowed = True
@ -216,15 +217,15 @@ class Role:
        
        observed = self._rc.env.memory.get_by_actions(self._rc.watch)
        
-        news = self._rc.memory.remember(observed)  # remember recent exact or similar memories
+        self._rc.news = self._rc.memory.remember(observed)  # remember recent exact or similar memories

        for i in env_msgs:
            self.recv(i)

-        news_text = [f"{i.role}: {i.content[:20]}..." for i in news]
+        news_text = [f"{i.role}: {i.content[:20]}..." for i in self._rc.news]
        if news_text:
            logger.debug(f'{self._setting} observed: {news_text}')
-        return len(news)
+        return len(self._rc.news)

    def _publish_message(self, msg):
        """如果role归属于env，那么role的消息会向env广播"""
@ -261,6 +262,8 @@ class Role:
                message = Message(message)
            if isinstance(message, Message):
                self.recv(message)
+            if isinstance(message, list):
+                self.recv(Message("\n".join(message)))
        elif not await self._observe():
            # 如果没有任何新信息，挂起等待
            logger.debug(f"{self._setting}: no news. waiting.")
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -27,6 +27,8 @@ class Message:
    instruct_content: BaseModel = field(default=None)
    role: str = field(default='user')  # system / user / assistant
    cause_by: Type["Action"] = field(default="")
+    sent_from: str = field(default="")
+    send_to: str = field(default="")

    def __str__(self):
        # prefix = '-'.join([self.role, str(self.cause_by)])
--- a/metagpt/tools/search_engine.py
+++ b/metagpt/tools/search_engine.py
@ -37,7 +37,7 @@ class SearchEngine:
        logger.info(results)
        return results

-    async def run(self, query, max_results=8):
+    async def run(self, query: str, max_results=8):
        if self.engine == SearchEngineType.SERPAPI_GOOGLE:
            api = SerpAPIWrapper()
            rsp = await api.run(query)
@ -45,10 +45,7 @@ class SearchEngine:
            rsp = SearchEngine.run_google(query, max_results)
        elif self.engine == SearchEngineType.SERPER_GOOGLE:
            api = SerperWrapper()
-            if isinstance(query, list):
-                rsp = await api.run(query)
-            elif isinstance(query, str):
-                rsp = await api.run([query])
+            rsp = await api.run(query)
        elif self.engine == SearchEngineType.CUSTOM_ENGINE:
            rsp = self.run_func(query)
        else:
@ -74,15 +71,15 @@ def google_official_search(query: str, num_results: int = 8, focus=['snippet', '
        api_key = config.google_api_key
        custom_search_engine_id = config.google_cse_id

-        service = build("customsearch", "v1", developerKey=api_key)
+        with build("customsearch", "v1", developerKey=api_key) as service:

-        result = (
-            service.cse()
-            .list(q=query, cx=custom_search_engine_id, num=num_results)
-            .execute()
-        )
-
-        # Extract the search result items from the response
+            result = (
+                service.cse()
+                .list(q=query, cx=custom_search_engine_id, num=num_results)
+                .execute()
+            )
+            logger.info(result)
+            # Extract the search result items from the response
        search_results = result.get("items", [])

        # Create a list of only the URLs from the search results
--- a/metagpt/tools/search_engine_serper.py
+++ b/metagpt/tools/search_engine_serper.py
@ -38,7 +38,8 @@ class SerperWrapper(BaseModel):

    async def run(self, query: str, **kwargs: Any) -> str:
        """Run query through Serper and parse result async."""
-        return ";".join([self._process_response(res) for res in await self.results(query)])
+        queries = query.split("\n")
+        return "\n".join([self._process_response(res) for res in await self.results(queries)])

    async def results(self, queries: list[str]) -> dict:
        """Use aiohttp to run query through Serper and return the results async."""
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -183,7 +183,7 @@ class CodeParser:
    def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]:
        # Regular expression pattern to find the tasks list.
        code = cls.parse_code(block, text, lang)
-        print(code)
+        # print(code)
        pattern = r'\s*(.*=.*)?(\[.*\])'

        # Extract tasks list string using regex.
@ -230,3 +230,8 @@ def print_members(module, indent=0):
            print(f'{prefix}Function: {name}')
        elif inspect.ismethod(obj):
            print(f'{prefix}Method: {name}')
+
+def parse_recipient(text):
+    pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
+    recipient = re.search(pattern, text)
+    return recipient.group(1) if recipient else ""
--- a/metagpt/utils/mermaid.py
+++ b/metagpt/utils/mermaid.py
@ -8,13 +8,11 @@
 import os
 import subprocess
 from pathlib import Path
-
+from metagpt.config import CONFIG
 from metagpt.const import PROJECT_ROOT
 from metagpt.logs import logger
 from metagpt.utils.common import check_cmd_exists

-IS_DOCKER = os.environ.get('AM_I_IN_A_DOCKER_CONTAINER', 'false').lower()
-

 def mermaid_to_file(mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int:
    """suffix: png/svg/pdf
@ -38,12 +36,13 @@ def mermaid_to_file(mermaid_code, output_file_without_suffix, width=2048, height
        output_file = f'{output_file_without_suffix}.{suffix}'
        # Call the `mmdc` command to convert the Mermaid code to a PNG
        logger.info(f"Generating {output_file}..")
-        if IS_DOCKER == 'true':
-            subprocess.run(['mmdc', '-p', '/app/metagpt/puppeteer-config.json', '-i',
-                           str(tmp), '-o', output_file, '-w', str(width), '-H', str(height)])
+
+        if CONFIG.puppeteer_config:
+            subprocess.run([CONFIG.mmdc, '-p', CONFIG.puppeteer_config, '-i', str(tmp), '-o',
+                            output_file, '-w', str(width), '-H', str(height)])
        else:
-            subprocess.run(['mmdc', '-i', str(tmp), '-o',
-                           output_file, '-w', str(width), '-H', str(height)])
+            subprocess.run([CONFIG.mmdc, '-i', str(tmp), '-o',
+                            output_file, '-w', str(width), '-H', str(height)])
    return 0


--- a/metagpt/utils/special_tokens.py
+++ b/metagpt/utils/special_tokens.py
@ -0,0 +1,4 @@
+# token to separate different code messages in a WriteCode Message content
+MSG_SEP = "#*000*#" 
+# token to seperate file name and the actual code text in a code message
+FILENAME_CODE_SEP = "#*001*#"