update: 提示词 ex 优化

2026-05-15 11:02:36 +02:00 · 2024-07-11 15:54:18 +08:00 · 2024-07-11 15:54:18 +08:00 · 8e984db9ff
commit 8e984db9ff
parent 67bb90d178 8274c4650e
15 changed files with 764 additions and 703 deletions
--- a/metagpt/environment/mgx/mgx_env.py
+++ b/metagpt/environment/mgx/mgx_env.py
@ -27,7 +27,7 @@ class MGXEnv(Environment):

    def publish_message(self, message: Message, user_defined_recipient: str = "", publicer: str = "") -> bool:
        """let the team leader take over message publishing"""
-        tl = self.get_role("Tim")  # TeamLeader's name is Tim
+        tl = self.get_role("Mike")  # TeamLeader's name is Mike

        if user_defined_recipient:
            # human user's direct chat message to a certain role
--- a/metagpt/ext/cr/actions/code_review.py
+++ b/metagpt/ext/cr/actions/code_review.py
@ -57,7 +57,7 @@ Just print the PR Patch comments in json format like **Output Format**.
 """

 CODE_REVIEW_COMFIRM_SYSTEM_PROMPT = """
-You are a professional engineer with Java stack, and good at code review comment result judgement.
+You are a professional engineer with {code_language} stack, and good at code review comment result judgement.
 """

 CODE_REVIEW_COMFIRM_TEMPLATE = """
@ -132,13 +132,20 @@ class CodeReview(Action):
                code = get_code_block_from_patch(
                    patch, str(max(1, int(code_start_line) - 5)), str(int(code_end_line) + 5)
                )
+            code_language = "Java"
+            code_file_ext = cmt.get("commented_file", ".java").split(".")[-1]
+            if code_file_ext == ".java":
+                code_language = "Java"
+            elif code_file_ext == ".py":
+                code_language = "Python"
            prompt = CODE_REVIEW_COMFIRM_TEMPLATE.format(
                code=code,
                comment=cmt.get("comment"),
                desc=point.text,
                example=point.yes_example + "\n" + point.no_example,
            )
-            resp = await self.llm.aask(prompt, system_msgs=[CODE_REVIEW_COMFIRM_SYSTEM_PROMPT])
+            system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)]
+            resp = await self.llm.aask(prompt, system_msgs=system_prompt)
            if "True" in resp or "true" in resp:
                new_comments.append(cmt)
        logger.info(f"original comments num: {len(comments)}, confirmed comments num: {len(new_comments)}")
@ -163,7 +170,11 @@ class CodeReview(Action):
            prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str)
            resp = await self.llm.aask(prompt)
            json_str = parse_json_code_block(resp)[0]
-            comments += json.loads(json_str)
+            comment = json.loads(json_str)
+            patched_file_path = patched_file.path
+            for c in comment:
+                c["commented_file"] = patched_file_path
+            comments += comment

        return comments

--- a/metagpt/ext/cr/actions/modify_code.py
+++ b/metagpt/ext/cr/actions/modify_code.py
@ -81,17 +81,18 @@ class ModifyCode(Action):
        }
        resp = None
        for patched_file in patch:
-            patch_target_file_name = str(patched_file.target_file).split("/", maxsplit=1)[-1]
-            if patch_target_file_name not in grouped_comments:
+            patch_target_file_name = str(patched_file.path).split("/")[-1]
+            if patched_file.path not in grouped_comments:
                continue
            comments_prompt = ""
            index = 1
-            for grouped_comment in grouped_comments[patch_target_file_name]:
+            for grouped_comment in grouped_comments[patched_file.path]:
                comments_prompt += f"""
                    <comment{index}>
                    {grouped_comment}
                    </comment{index}>\n
                """
+                index += 1
            prompt = MODIFY_CODE_PROMPT.format(patch=patched_file, comments=comments_prompt)
            output_dir = (
                Path(output_dir)
--- a/metagpt/ext/cr/points.json
+++ b/metagpt/ext/cr/points.json
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -51,6 +51,7 @@ Some text indicating your thoughts, such as how you should update the plan statu
 ```
 Notice: your output JSON data section must start with **```json [**
 """
+
 JSON_REPAIR_PROMPT = """
 ## json data
 {json_data}
@ -61,3 +62,10 @@ Formatted JSON data
 ```
 Help check if there are any formatting issues with the JSON data? If so, please help format it
 """
+
+QUICK_THINK_PROMPT = """
+Decide if the latest user message is a quick question.
+Quick questions include common-sense, logical, math questions, greetings, or casual chat that you can answer directly, excluding software development tasks.
+Respond with "#YES#, (then start your actual response to the question...)" if so, otherwise, simply respond with "#NO#".
+Your response:
+"""
--- a/metagpt/prompts/di/swe_agent.py
+++ b/metagpt/prompts/di/swe_agent.py
@ -89,11 +89,11 @@ Thought: We have located both the `openai_api.py` file. Let's start by opening t
 ->

 ### Fix the Bug(Require): Fix the bug in the code by editing the relevant function, class or code snippet.
-Thought: Let's edit the apow function to include the Pow case in the isinstance check.
+Thought: Now that I've found the bug, let's fix it by edit.
 {{
    "command_name": "Bash.run",
    "args": {{
-            "cmd": "edit 93:95 <<EOF\n        usage = None\n        collected_messages = []\n        async for chunk in response:\n            if chunk.usage is not None:\n                usage = CompletionUsage(**chunk.usage)\n            chunk_message = chunk.choices[0].delta.content or '' if chunk.choices else ''  # extract the message\n            finish_reason = (\n                chunk.choices[0].finish_reason if chunk.choices and hasattr(chunk.choices[0], 'finish_reason') else None\n            )\n            log_llm_stream(chunk_message)\nEOF"
+        "cmd": "edit 93:95 <<EOF\n        usage = None\n        collected_messages = []\n        async for chunk in response:\n            if chunk.usage is not None:\n                usage = CompletionUsage(**chunk.usage)\n            chunk_message = chunk.choices[0].delta.content or '' if chunk.choices else ''  # extract the message\n            finish_reason = (\n                chunk.choices[0].finish_reason if chunk.choices and hasattr(chunk.choices[0], 'finish_reason') else None\n            )\n            log_llm_stream(chunk_message)\nEOF"
    }}
 }}
 ->
@ -101,7 +101,7 @@ Thought: Due to a syntax error related to an undefined name 'Image', we need to
 {{
    "command_name": "Bash.run",
    "args": {{
-            "cmd": "edit 14:14 <<EOF\nfrom PIL.Image import Image\nEOF"
+        "cmd": "edit 14:14 <<EOF\nfrom PIL.Image import Image\nEOF"
    }}
 }}
 ->
@ -244,5 +244,5 @@ The current bash state is:
 (Open file: {{open_file}})
 (Current directory: {{working_dir}})

-Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format"
+Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format:"
 """
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -8,12 +8,13 @@ from typing import Callable, Dict, List, Literal, Tuple

 from pydantic import model_validator

-from metagpt.actions import Action
+from metagpt.actions import Action, UserRequirement
 from metagpt.actions.di.run_command import RunCommand
 from metagpt.logs import logger
 from metagpt.prompts.di.role_zero import (
    CMD_PROMPT,
    JSON_REPAIR_PROMPT,
+    QUICK_THINK_PROMPT,
    ROLE_INSTRUCTION,
 )
 from metagpt.roles import Role
@ -24,7 +25,7 @@ from metagpt.tools.libs.browser import Browser
 from metagpt.tools.libs.editor import Editor
 from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
 from metagpt.tools.tool_registry import register_tool
-from metagpt.utils.common import CodeParser
+from metagpt.utils.common import CodeParser, any_to_str
 from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output
 from metagpt.utils.report import ThoughtReporter

@ -183,9 +184,14 @@ class RoleZero(Role):
        )

    async def _react(self) -> Message:
-        # NOTE: Diff 1: Each time landing here means observing news, set todo to allow news processing in _think
+        # NOTE: Diff 1: Each time landing here means news is observed, set todo to allow news processing in _think
        self._set_state(0)

+        # problems solvable by quick thinking doesn't need to a formal think-act cycle
+        quick_rsp = await self._quick_think()
+        if quick_rsp:
+            return quick_rsp
+
        actions_taken = 0
        rsp = AIMessage(content="No actions taken yet", cause_by=Action)  # will be overwritten after Role _act
        while actions_taken < self.rc.max_react_loop:
@ -202,6 +208,29 @@ class RoleZero(Role):
            actions_taken += 1
        return rsp  # return output from the last action

+    async def _quick_think(self) -> Message:
+        msg = self.rc.news[-1]
+        rsp_msg = None
+        if msg.cause_by != any_to_str(UserRequirement):
+            # Agents themselves won't generate quick questions, use this rule to reduce extra llm calls
+            return rsp_msg
+
+        context = self.llm.format_msg(self.get_memories(k=4) + [UserMessage(content=QUICK_THINK_PROMPT)])
+        rsp = await self.llm.aask(context)
+
+        pattern = r"#YES#,? ?"
+        if re.search(pattern, rsp):
+            answer = re.sub(pattern, "", rsp).strip()
+            self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand))
+            await self.reply_to_human(content=answer)
+            rsp_msg = AIMessage(
+                content="Complete run",
+                sent_from=self.name,
+                cause_by=RunCommand,
+            )
+
+        return rsp_msg
+
    async def _parse_commands(self) -> Tuple[List[Dict], bool]:
        """Retrieves commands from the Large Language Model (LLM).

@ -217,6 +246,7 @@ class RoleZero(Role):
            commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp)
            commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON))
        except json.JSONDecodeError:
+            logger.warning(f"Failed to parse JSON for: {self.command_rsp}. Trying to repair...")
            commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp))
            commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands))
        except Exception as e:
--- a/metagpt/strategy/experience_retriever.py
+++ b/metagpt/strategy/experience_retriever.py
@ -510,15 +510,6 @@ Explanation: The requirement is about software development. Assign each tasks to
            "assignee": "Alex"
        }
    },
-    {
-        "command_name": "Plan.append_task",
-        "args": {
-            "task_id": "5",
-            "dependent_task_ids": ["4"],
-            "instruction": "Write comprehensive tests for the game logic and user interface to ensure functionality and reliability.",
-            "assignee": "Edward"
-        }
-    },
    {
        "command_name": "TeamLeader.publish_message",
        "args": {
--- a/metagpt/tools/libs/browser.py
+++ b/metagpt/tools/libs/browser.py
@ -24,6 +24,7 @@ from metagpt.utils.a11y_tree import (
    scroll_page,
    type_text,
 )
+from metagpt.utils.proxy_env import get_proxy_from_env
 from metagpt.utils.report import BrowserReporter


@ -72,7 +73,7 @@ class Browser:
        self.page: Optional[Page] = None
        self.accessibility_tree: list = []
        self.headless: bool = True
-        self.proxy = None
+        self.proxy = get_proxy_from_env()
        self.is_empty_page = True
        self.reporter = BrowserReporter()

@ -120,7 +121,7 @@ class Browser:
        await scroll_page(self.page, direction)
        return await self._wait_page()

-    async def goto(self, url: str, timeout: float = 30000):
+    async def goto(self, url: str, timeout: float = 90000):
        """Navigate to a specific URL."""
        if self.page is None:
            await self.start()
--- a/metagpt/tools/libs/cr.py
+++ b/metagpt/tools/libs/cr.py
@ -3,6 +3,7 @@ from pathlib import Path
 from typing import Optional

 import aiofiles
+from bs4 import BeautifulSoup
 from unidiff import PatchSet

 import metagpt.ext.cr
@ -29,7 +30,7 @@ class CodeReview:
        Args:
            patch_path: The local path of the patch file or the url of the pull request. Example: "/data/xxx-pr-1.patch", "https://github.com/xx/XX/pull/1362"
            cr_output_file: Output file path where code review comments will be saved. Example: "cr/xxx-pr-1.json"
-            cr_point_file: File path for specifying code review points. Defaults to a predefined file.
+            cr_point_file: File path for specifying code review points. Set `None` to use a predefined file.
        """
        patch = await self._get_patch_content(patch_path)
        cr_point_file = cr_point_file if cr_point_file else Path(metagpt.ext.cr.__file__).parent / "points.json"
@ -45,7 +46,7 @@ class CodeReview:
            )
            comments = await CodeReview_().run(patch, cr_points)
            cr_output_path.parent.mkdir(exist_ok=True, parents=True)
-            async with aiofiles.open(cr_output_path, "w") as f:
+            async with aiofiles.open(cr_output_path, "w", encoding="utf-8") as f:
                await f.write(json.dumps(comments, ensure_ascii=False))
            await reporter.async_report(cr_output_path)

@ -65,7 +66,7 @@ class CodeReview:
            output_dir: File path where code review comments are stored.
        """
        patch = await self._get_patch_content(patch_path)
-        async with aiofiles.open(cr_file, "r") as f:
+        async with aiofiles.open(cr_file, "r", encoding="utf-8") as f:
            comments = json.loads(await f.read())
        await ModifyCode(pr="").run(patch, comments, output_dir)
        return f"The fixed patch files store in {output_dir}"
@ -75,12 +76,14 @@ class CodeReview:
            # async with aiohttp.ClientSession(trust_env=True) as client:
            #     async with client.get(f"{patch_path}.diff", ) as resp:
            #         patch_file_content = await resp.text()
-            browser = Browser()
-            browser.proxy = {"server": "http://127.0.0.1:20172"}
-            async with browser:
+            async with Browser() as browser:
                await browser.goto(f"{patch_path}.diff")
                patch_file_content = await browser.page.content()
-
+                if patch_file_content.startswith("<html>"):
+                    soup = BeautifulSoup(patch_file_content, "html.parser")
+                    pre = soup.find("pre")
+                    if pre:
+                        patch_file_content = pre.text
        else:
            async with aiofiles.open(patch_path) as f:
                patch_file_content = await f.read()
--- a/metagpt/tools/libs/git.py
+++ b/metagpt/tools/libs/git.py
@ -9,7 +9,6 @@ from github.Issue import Issue
 from github.PullRequest import PullRequest

 from metagpt.tools.tool_registry import register_tool
-from metagpt.utils.git_repository import GitBranch


@register_tool(tags=["software development", "git", "Push to remote git repository."])
--- a/metagpt/tools/libs/terminal.py
+++ b/metagpt/tools/libs/terminal.py
@ -1,6 +1,6 @@
 import asyncio
 from asyncio import Queue
-from asyncio.subprocess import PIPE
+from asyncio.subprocess import PIPE, STDOUT
 from typing import Optional

 from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_SETUP_PATH
@ -28,7 +28,7 @@ class Terminal:
    async def _start_process(self):
        # Start a persistent shell process
        self.process = await asyncio.create_subprocess_exec(
-            *self.shell_command, stdin=PIPE, stdout=PIPE, stderr=PIPE, executable="bash"
+            *self.shell_command, stdin=PIPE, stdout=PIPE, stderr=STDOUT, executable="bash"
        )
        await self._check_state()

@ -116,6 +116,8 @@ class Terminal:
            tmp = b""
            while True:
                output = tmp + await self.process.stdout.read(1)
+                if not output:
+                    continue
                *lines, tmp = output.splitlines(True)
                for line in lines:
                    line = line.decode()
@ -123,12 +125,12 @@ class Terminal:
                    if ix >= 0:
                        line = line[0:ix]
                        if line:
-                            observer.report(line, "output")
+                            await observer.async_report(line, "output")
                            # report stdout in real-time
                            cmd_output.append(line)
                        return "".join(cmd_output)
                    # log stdout in real-time
-                    observer.report(line, "output")
+                    await observer.async_report(line, "output")
                    cmd_output.append(line)
                    if daemon:
                        await self.stdout_queue.put(line)
--- a/metagpt/utils/proxy_env.py
+++ b/metagpt/utils/proxy_env.py
@ -0,0 +1,19 @@
+import os
+
+
+def get_proxy_from_env():
+    proxy_config = {}
+    server = None
+    for i in ("ALL_PROXY", "all_proxy", "HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"):
+        if os.environ.get(i):
+            server = os.environ.get(i)
+    if server:
+        proxy_config["server"] = server
+    no_proxy = os.environ.get("NO_PROXY") or os.environ.get("no_proxy")
+    if no_proxy:
+        proxy_config["bypass"] = no_proxy
+
+    if not proxy_config:
+        proxy_config = None
+
+    return proxy_config
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -31,9 +31,9 @@ TOKEN_COSTS = {
    "gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03},
    "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
    "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
-    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
    "gpt-4o": {"prompt": 0.005, "completion": 0.015},
    "gpt-4o-2024-05-13": {"prompt": 0.005, "completion": 0.015},
+    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
@ -147,6 +147,8 @@ FIREWORKS_GRADE_TOKEN_COSTS = {

 # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
 TOKEN_MAX = {
+    "gpt-4o-2024-05-13": 128000,
+    "gpt-4o": 128000,
    "gpt-4-0125-preview": 128000,
    "gpt-4-turbo-preview": 128000,
    "gpt-4-1106-preview": 128000,
--- a/tests/metagpt/environment/mgx_env/run_mgx_env.py
+++ b/tests/metagpt/environment/mgx_env/run_mgx_env.py
@ -129,9 +129,11 @@ TL_CHAT2 = """Solve the issue at this link"""  # expecting clarification
 TL_CHAT3 = """Who is the first man landing on Moon"""  # expecting answering directly
 TL_CHAT4 = """Find all zeros in the indicated finite field of the given polynomial with coefficients in that field. x^5 + 3x^3 + x^2 + 2x in Z_5"""  # expecting answering directly
 TL_CHAT5 = """Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q."""  # expecting answering directly
-TL_CHAT6 = """Statement 1 | A ring homomorphism is one to one if and only if the kernel is {{0}},. Statement 2 | Q is an ideal in R"""  # expecting answering directly
+TL_CHAT6 = """True or False? Statement 1 | A ring homomorphism is one to one if and only if the kernel is {{0}},. Statement 2 | Q is an ideal in R"""  # expecting answering directly
 TL_CHAT7 = """Jean has 30 lollipops. Jean eats 2 of the lollipops. With the remaining lollipops, Jean wants to package 2 lollipops in one bag. How many bags can Jean fill?"""  # expecting answering directly
-
+TL_CHAT9 = """What's your name?"""
+TL_CHAT10 = "Hi"
+TL_CHAT11 = "Tell me about your team"

 if __name__ == "__main__":
    # NOTE: Add access_token to test github issue fixing