Merge branch 'mgx_ops' into 'data_analyst_ldy'

# Conflicts: # metagpt/schema.py
2026-06-05 14:55:18 +02:00 · 2024-07-17 02:47:45 +00:00 · 2024-07-17 02:47:45 +00:00 · 127ef8707b
commit 127ef8707b
parent 91997aba5a de82461815
78 changed files with 2815 additions and 917 deletions
--- a/metagpt/tools/libs/browser.py
+++ b/metagpt/tools/libs/browser.py
@ -24,6 +24,7 @@ from metagpt.utils.a11y_tree import (
    scroll_page,
    type_text,
 )
+from metagpt.utils.proxy_env import get_proxy_from_env
 from metagpt.utils.report import BrowserReporter


@ -72,7 +73,7 @@ class Browser:
        self.page: Optional[Page] = None
        self.accessibility_tree: list = []
        self.headless: bool = True
-        self.proxy = None
+        self.proxy = get_proxy_from_env()
        self.is_empty_page = True
        self.reporter = BrowserReporter()

@ -120,7 +121,7 @@ class Browser:
        await scroll_page(self.page, direction)
        return await self._wait_page()

-    async def goto(self, url: str, timeout: float = 30000):
+    async def goto(self, url: str, timeout: float = 90000):
        """Navigate to a specific URL."""
        if self.page is None:
            await self.start()
--- a/metagpt/tools/libs/cr.py
+++ b/metagpt/tools/libs/cr.py
@ -3,6 +3,7 @@ from pathlib import Path
 from typing import Optional

 import aiofiles
+from bs4 import BeautifulSoup
 from unidiff import PatchSet

 import metagpt.ext.cr
@ -29,7 +30,7 @@ class CodeReview:
        Args:
            patch_path: The local path of the patch file or the url of the pull request. Example: "/data/xxx-pr-1.patch", "https://github.com/xx/XX/pull/1362"
            cr_output_file: Output file path where code review comments will be saved. Example: "cr/xxx-pr-1.json"
-            cr_point_file: File path for specifying code review points. Defaults to a predefined file.
+            cr_point_file: File path for specifying code review points. If not specified, this parameter is not passed..
        """
        patch = await self._get_patch_content(patch_path)
        cr_point_file = cr_point_file if cr_point_file else Path(metagpt.ext.cr.__file__).parent / "points.json"
@ -45,7 +46,7 @@ class CodeReview:
            )
            comments = await CodeReview_().run(patch, cr_points)
            cr_output_path.parent.mkdir(exist_ok=True, parents=True)
-            async with aiofiles.open(cr_output_path, "w") as f:
+            async with aiofiles.open(cr_output_path, "w", encoding="utf-8") as f:
                await f.write(json.dumps(comments, ensure_ascii=False))
            await reporter.async_report(cr_output_path)

@ -65,7 +66,7 @@ class CodeReview:
            output_dir: File path where code review comments are stored.
        """
        patch = await self._get_patch_content(patch_path)
-        async with aiofiles.open(cr_file, "r") as f:
+        async with aiofiles.open(cr_file, "r", encoding="utf-8") as f:
            comments = json.loads(await f.read())
        await ModifyCode(pr="").run(patch, comments, output_dir)
        return f"The fixed patch files store in {output_dir}"
@ -75,12 +76,14 @@ class CodeReview:
            # async with aiohttp.ClientSession(trust_env=True) as client:
            #     async with client.get(f"{patch_path}.diff", ) as resp:
            #         patch_file_content = await resp.text()
-            browser = Browser()
-            browser.proxy = {"server": "http://127.0.0.1:20172"}
-            async with browser:
+            async with Browser() as browser:
                await browser.goto(f"{patch_path}.diff")
                patch_file_content = await browser.page.content()
-
+                if patch_file_content.startswith("<html>"):
+                    soup = BeautifulSoup(patch_file_content, "html.parser")
+                    pre = soup.find("pre")
+                    if pre:
+                        patch_file_content = pre.text
        else:
            async with aiofiles.open(patch_path) as f:
                patch_file_content = await f.read()
--- a/metagpt/tools/libs/git.py
+++ b/metagpt/tools/libs/git.py
@ -11,7 +11,7 @@ from github.PullRequest import PullRequest
 from metagpt.tools.tool_registry import register_tool


-@register_tool(tags=["software development", "git", "Commit the changes and push to remote git repository."])
+@register_tool(tags=["software development", "git", "Push to remote git repository."])
 async def git_push(
    local_path: Union[str, Path],
    app_name: str,
@ -22,22 +22,23 @@ async def git_push(
    Pushes changes from a local Git repository to its remote counterpart.

    Args:
-        local_path (Union[str, Path]): The path to the local Git repository.
-        app_name (str): The name of the application where the repository is hosted. For example, "github", "gitlab", "bitbucket", etc.
-        comments (str, optional): The commit message to use. Defaults to "Commit".
+        local_path (Union[str, Path]): The absolute path to the local Git repository.
+        app_name (str): The name of the platform hosting the repository (e.g., "github", "gitlab", "bitbucket").
+        comments (str, optional): Comments to be associated with the push. Defaults to "Commit".
        new_branch (str, optional): The name of the new branch to create and push changes to.
            If not provided, changes will be pushed to the current branch. Defaults to "".

    Returns:
        GitBranch: The branch to which the changes were pushed.
+
    Raises:
        ValueError: If the provided local_path does not point to a valid Git repository.

    Example:
        >>> url = "https://github.com/iorisa/snake-game.git"
        >>> local_path = await git_clone(url=url)
-        >>> app_name="github"
-        >>> comments = "Archive"
+        >>> app_name = "github"
+        >>> comments = "Commit"
        >>> new_branch = "feature/new"
        >>> branch = await git_push(local_path=local_path, app_name=app_name, comments=comments, new_branch=new_branch)
        >>> base = branch.base
@ -45,8 +46,8 @@ async def git_push(
        >>> repo_name = branch.repo_name
        >>> print(f"base branch:'{base}', head branch:'{head}', repo_name:'{repo_name}'")
        base branch:'master', head branch:'feature/new', repo_name:'iorisa/snake-game'
-
    """
+
    from metagpt.tools.libs import get_env
    from metagpt.utils.git_repository import GitRepository

@ -64,9 +65,9 @@ async def git_push(
 async def git_create_pull(
    base: str,
    head: str,
-    base_repo_name: str,
    app_name: str,
-    head_repo_name: Optional[str] = None,
+    base_repo_name: str,
+    head_repo_name: str = None,
    title: Optional[str] = None,
    body: Optional[str] = None,
    issue: Optional[Issue] = None,
@ -75,14 +76,14 @@ async def git_create_pull(
    Creates a pull request on a Git repository. Use this tool in priority over Browser to create a pull request.

    Args:
-        base (str): The base branch of the pull request.
-        head (str): The head branch of the pull request.
-        base_repo_name (str): The full repository name (user/repo) where the pull request will be created.
-        app_name (str): The name of the application where the repository is hosted. For example, "github", "gitlab", "bitbucket", etc.
-        head_repo_name (Optional[str], optional): The full repository name (user/repo) where the pull request will merge from. Defaults to None.
-        title (Optional[str], optional): The title of the pull request. Defaults to None.
-        body (Optional[str], optional): The body of the pull request. Defaults to None.
-        issue (Optional[Issue], optional): The related issue of the pull request. Defaults to None.
+        base (str): The name of the base branch where the pull request will be merged.
+        head (str): The name of the branch that contains the changes for the pull request.
+        app_name (str): The name of the platform hosting the repository (e.g., "github", "gitlab", "bitbucket").
+        base_repo_name (str): The full name of the target repository (in the format "user/repo") where the pull request will be created.
+        head_repo_name (Optional[str]): The full name of the source repository (in the format "user/repo") from which the changes will be pulled.
+        title (Optional[str]): The title of the pull request. Defaults to None.
+        body (Optional[str]): The description or body content of the pull request. Defaults to None.
+        issue (Optional[Issue]): An optional issue related to the pull request. Defaults to None.

    Example:
        >>> # create pull request
--- a/metagpt/tools/libs/terminal.py
+++ b/metagpt/tools/libs/terminal.py
@ -1,8 +1,10 @@
-import subprocess
-import threading
-from queue import Queue
+import asyncio
+from asyncio import Queue
+from asyncio.subprocess import PIPE, STDOUT
+from typing import Optional

 from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_SETUP_PATH
+from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
 from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter

@ -19,62 +21,54 @@ class Terminal:
    def __init__(self):
        self.shell_command = ["bash"]  # FIXME: should consider windows support later
        self.command_terminator = "\n"
-
-        # Start a persistent shell process
-        self.process = subprocess.Popen(
-            self.shell_command,
-            shell=True,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            executable="/bin/bash",
-        )
-        self.stdout_queue = Queue()
+        self.stdout_queue = Queue(maxsize=1000)
        self.observer = TerminalReporter()
+        self.process: Optional[asyncio.subprocess.Process] = None

-        self._check_state()
+    async def _start_process(self):
+        # Start a persistent shell process
+        self.process = await asyncio.create_subprocess_exec(
+            *self.shell_command, stdin=PIPE, stdout=PIPE, stderr=STDOUT, executable="bash"
+        )
+        await self._check_state()

-    def _check_state(self):
-        """Check the state of the terminal, e.g. the current directory of the terminal process. Useful for agent to understand."""
-        print("The terminal is at:", self.run_command("pwd"))
+    async def _check_state(self):
+        """
+        Check the state of the terminal, e.g. the current directory of the terminal process. Useful for agent to understand.
+        """
+        output = await self.run_command("pwd")
+        logger.info("The terminal is at:", output)

-    def run_command(self, cmd: str, daemon=False) -> str:
+    async def run_command(self, cmd: str, daemon=False) -> str:
        """
        Executes a specified command in the terminal and streams the output back in real time.
        This command maintains state across executions, such as the current directory,
-        allowing for sequential commands to be contextually aware. The output from the
-        command execution is placed into `stdout_queue`, which can be consumed as needed.
+        allowing for sequential commands to be contextually aware.

        Args:
            cmd (str): The command to execute in the terminal.
-            daemon (bool): If True, executes the command in a background thread, allowing
-                           the main program to continue execution. The command's output is
-                           collected asynchronously in daemon mode and placed into `stdout_queue`.
-
+            daemon (bool): If True, executes the command in an asynchronous task, allowing
+                           the main program to continue execution.
        Returns:
            str: The command's output or an empty string if `daemon` is True. Remember that
-                 when `daemon` is True, the output is collected into `stdout_queue` and must
-                 be consumed from there.
-
-        Note:
-            If `stdout_queue` is not periodically consumed, it could potentially grow indefinitely,
-            consuming memory. Ensure that there's a mechanism in place to consume this queue,
-            especially during long-running or output-heavy command executions.
+                 when `daemon` is True, use the `get_stdout_output` method to get the output.
        """
+        if self.process is None:
+            await self._start_process()

        # Send the command
        self.process.stdin.write((cmd + self.command_terminator).encode())
        self.process.stdin.write(
-            (f'echo "{END_MARKER_VALUE}"{self.command_terminator}').encode()  # write EOF
+            f'echo "{END_MARKER_VALUE}"{self.command_terminator}'.encode()  # write EOF
        )  # Unique marker to signal command end
-        self.process.stdin.flush()
+        await self.process.stdin.drain()
        if daemon:
-            threading.Thread(target=self._read_and_process_output, args=(cmd,), daemon=True).start()
+            asyncio.create_task(self._read_and_process_output(cmd))
            return ""
        else:
-            return self._read_and_process_output(cmd)
+            return await self._read_and_process_output(cmd)

-    def execute_in_conda_env(self, cmd: str, env, daemon=False) -> str:
+    async def execute_in_conda_env(self, cmd: str, env, daemon=False) -> str:
        """
        Executes a given command within a specified Conda environment automatically without
        the need for manual activation. Users just need to provide the name of the Conda
@ -84,7 +78,7 @@ class Terminal:
            cmd (str): The command to execute within the Conda environment.
            env (str, optional): The name of the Conda environment to activate before executing the command.
                                 If not specified, the command will run in the current active environment.
-            daemon (bool): If True, the command is run in a background thread, similar to `run_command`,
+            daemon (bool): If True, the command is run in an asynchronous task, similar to `run_command`,
                           affecting error logging and handling in the same manner.

        Returns:
@ -96,19 +90,34 @@ class Terminal:
            to ensure the specified environment is active for the command's execution.
        """
        cmd = f"conda run -n {env} {cmd}"
-        return self.run_command(cmd, daemon=daemon)
+        return await self.run_command(cmd, daemon=daemon)

-    def _read_and_process_output(self, cmd):
-        with self.observer as observer:
+    async def get_stdout_output(self) -> str:
+        """
+        Retrieves all collected output from background running commands and returns it as a string.
+
+        Returns:
+            str: The collected output from background running commands, returned as a string.
+        """
+        output_lines = []
+        while not self.stdout_queue.empty():
+            line = await self.stdout_queue.get()
+            output_lines.append(line)
+        return "\n".join(output_lines)
+
+    async def _read_and_process_output(self, cmd, daemon=False) -> str:
+        async with self.observer as observer:
            cmd_output = []
-            observer.report(cmd + self.command_terminator, "cmd")
-            # report the comman
+            await observer.async_report(cmd + self.command_terminator, "cmd")
+            # report the command
            # Read the output until the unique marker is found.
            # We read bytes directly from stdout instead of text because when reading text,
            # '\r' is changed to '\n', resulting in excessive output.
            tmp = b""
            while True:
-                output = tmp + self.process.stdout.read(1)
+                output = tmp + await self.process.stdout.read(1)
+                if not output:
+                    continue
                *lines, tmp = output.splitlines(True)
                for line in lines:
                    line = line.decode()
@ -116,20 +125,20 @@ class Terminal:
                    if ix >= 0:
                        line = line[0:ix]
                        if line:
-                            observer.report(line, "output")
+                            await observer.async_report(line, "output")
                            # report stdout in real-time
                            cmd_output.append(line)
                        return "".join(cmd_output)
                    # log stdout in real-time
-                    observer.report(line, "output")
+                    await observer.async_report(line, "output")
                    cmd_output.append(line)
-                    self.stdout_queue.put(line)
+                    if daemon:
+                        await self.stdout_queue.put(line)

-    def close(self):
+    async def close(self):
        """Close the persistent shell process."""
        self.process.stdin.close()
-        self.process.terminate()
-        self.process.wait()
+        await self.process.wait()


@register_tool(include_functions=["run"])
@ -142,10 +151,13 @@ class Bash(Terminal):
    def __init__(self):
        """init"""
        super().__init__()
-        self.run_command(f"cd {DEFAULT_WORKSPACE_ROOT}")
-        self.run_command(f"source {SWE_SETUP_PATH}")
+        self.start_flag = False

-    def run(self, cmd) -> str:
+    async def start(self):
+        await self.run_command(f"cd {DEFAULT_WORKSPACE_ROOT}")
+        await self.run_command(f"source {SWE_SETUP_PATH}")
+
+    async def run(self, cmd) -> str:
        """
        Executes a bash command.

@ -184,9 +196,6 @@ class Bash(Terminal):
          Arguments:
              filename (str): The name of the file to create.

-        - submit
-          Submits your current code. it can only be executed once, the last action before the `end`.
-
        - search_dir_and_preview <search_term> [<dir>]
          Searches for search_term in all files in dir and gives their code preview
          with line numbers. If dir is not provided, searches in the current directory.
@ -220,6 +229,13 @@ class Bash(Terminal):
              end_line (int): The line number to end the edit at (inclusive), starting from 1.
              replacement_text (str): The text to replace the current selection with, must conform to PEP8 standards.

+        - submit
+          Submits your current code locally. it can only be executed once, the last action before the `end`.
+
        Note: Make sure to use these functions as per their defined arguments and behaviors.
        """
-        return self.run_command(cmd)
+        if not self.start_flag:
+            await self.start()
+            self.start_flag = True
+
+        return await self.run_command(cmd)
--- a/metagpt/tools/swe_agent_commands/setup_default.sh
+++ b/metagpt/tools/swe_agent_commands/setup_default.sh
@ -17,4 +17,3 @@ source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/search.sh
 source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/edit_linting.sh

 export SWE_CMD_WORK_DIR="$REPO_ROOT_DIR/workspace/swe_agent_workdir"
-#sudo chmod 777 $REPO_ROOT_DIR/workspace/swe_agent_workdir