Merge branch 'mgx_ops' into 'data_analyst_ldy'

# Conflicts:
#   metagpt/schema.py
This commit is contained in:
李丹阳 2024-07-17 02:47:45 +00:00
commit 127ef8707b
78 changed files with 2815 additions and 917 deletions

View file

@ -24,6 +24,7 @@ from metagpt.utils.a11y_tree import (
scroll_page,
type_text,
)
from metagpt.utils.proxy_env import get_proxy_from_env
from metagpt.utils.report import BrowserReporter
@ -72,7 +73,7 @@ class Browser:
self.page: Optional[Page] = None
self.accessibility_tree: list = []
self.headless: bool = True
self.proxy = None
self.proxy = get_proxy_from_env()
self.is_empty_page = True
self.reporter = BrowserReporter()
@ -120,7 +121,7 @@ class Browser:
await scroll_page(self.page, direction)
return await self._wait_page()
async def goto(self, url: str, timeout: float = 30000):
async def goto(self, url: str, timeout: float = 90000):
"""Navigate to a specific URL."""
if self.page is None:
await self.start()

View file

@ -3,6 +3,7 @@ from pathlib import Path
from typing import Optional
import aiofiles
from bs4 import BeautifulSoup
from unidiff import PatchSet
import metagpt.ext.cr
@ -29,7 +30,7 @@ class CodeReview:
Args:
patch_path: The local path of the patch file or the url of the pull request. Example: "/data/xxx-pr-1.patch", "https://github.com/xx/XX/pull/1362"
cr_output_file: Output file path where code review comments will be saved. Example: "cr/xxx-pr-1.json"
cr_point_file: File path for specifying code review points. Defaults to a predefined file.
cr_point_file: File path for specifying code review points. If not specified, this parameter is not passed..
"""
patch = await self._get_patch_content(patch_path)
cr_point_file = cr_point_file if cr_point_file else Path(metagpt.ext.cr.__file__).parent / "points.json"
@ -45,7 +46,7 @@ class CodeReview:
)
comments = await CodeReview_().run(patch, cr_points)
cr_output_path.parent.mkdir(exist_ok=True, parents=True)
async with aiofiles.open(cr_output_path, "w") as f:
async with aiofiles.open(cr_output_path, "w", encoding="utf-8") as f:
await f.write(json.dumps(comments, ensure_ascii=False))
await reporter.async_report(cr_output_path)
@ -65,7 +66,7 @@ class CodeReview:
output_dir: File path where code review comments are stored.
"""
patch = await self._get_patch_content(patch_path)
async with aiofiles.open(cr_file, "r") as f:
async with aiofiles.open(cr_file, "r", encoding="utf-8") as f:
comments = json.loads(await f.read())
await ModifyCode(pr="").run(patch, comments, output_dir)
return f"The fixed patch files store in {output_dir}"
@ -75,12 +76,14 @@ class CodeReview:
# async with aiohttp.ClientSession(trust_env=True) as client:
# async with client.get(f"{patch_path}.diff", ) as resp:
# patch_file_content = await resp.text()
browser = Browser()
browser.proxy = {"server": "http://127.0.0.1:20172"}
async with browser:
async with Browser() as browser:
await browser.goto(f"{patch_path}.diff")
patch_file_content = await browser.page.content()
if patch_file_content.startswith("<html>"):
soup = BeautifulSoup(patch_file_content, "html.parser")
pre = soup.find("pre")
if pre:
patch_file_content = pre.text
else:
async with aiofiles.open(patch_path) as f:
patch_file_content = await f.read()

View file

@ -11,7 +11,7 @@ from github.PullRequest import PullRequest
from metagpt.tools.tool_registry import register_tool
@register_tool(tags=["software development", "git", "Commit the changes and push to remote git repository."])
@register_tool(tags=["software development", "git", "Push to remote git repository."])
async def git_push(
local_path: Union[str, Path],
app_name: str,
@ -22,22 +22,23 @@ async def git_push(
Pushes changes from a local Git repository to its remote counterpart.
Args:
local_path (Union[str, Path]): The path to the local Git repository.
app_name (str): The name of the application where the repository is hosted. For example, "github", "gitlab", "bitbucket", etc.
comments (str, optional): The commit message to use. Defaults to "Commit".
local_path (Union[str, Path]): The absolute path to the local Git repository.
app_name (str): The name of the platform hosting the repository (e.g., "github", "gitlab", "bitbucket").
comments (str, optional): Comments to be associated with the push. Defaults to "Commit".
new_branch (str, optional): The name of the new branch to create and push changes to.
If not provided, changes will be pushed to the current branch. Defaults to "".
Returns:
GitBranch: The branch to which the changes were pushed.
Raises:
ValueError: If the provided local_path does not point to a valid Git repository.
Example:
>>> url = "https://github.com/iorisa/snake-game.git"
>>> local_path = await git_clone(url=url)
>>> app_name="github"
>>> comments = "Archive"
>>> app_name = "github"
>>> comments = "Commit"
>>> new_branch = "feature/new"
>>> branch = await git_push(local_path=local_path, app_name=app_name, comments=comments, new_branch=new_branch)
>>> base = branch.base
@ -45,8 +46,8 @@ async def git_push(
>>> repo_name = branch.repo_name
>>> print(f"base branch:'{base}', head branch:'{head}', repo_name:'{repo_name}'")
base branch:'master', head branch:'feature/new', repo_name:'iorisa/snake-game'
"""
from metagpt.tools.libs import get_env
from metagpt.utils.git_repository import GitRepository
@ -64,9 +65,9 @@ async def git_push(
async def git_create_pull(
base: str,
head: str,
base_repo_name: str,
app_name: str,
head_repo_name: Optional[str] = None,
base_repo_name: str,
head_repo_name: str = None,
title: Optional[str] = None,
body: Optional[str] = None,
issue: Optional[Issue] = None,
@ -75,14 +76,14 @@ async def git_create_pull(
Creates a pull request on a Git repository. Use this tool in priority over Browser to create a pull request.
Args:
base (str): The base branch of the pull request.
head (str): The head branch of the pull request.
base_repo_name (str): The full repository name (user/repo) where the pull request will be created.
app_name (str): The name of the application where the repository is hosted. For example, "github", "gitlab", "bitbucket", etc.
head_repo_name (Optional[str], optional): The full repository name (user/repo) where the pull request will merge from. Defaults to None.
title (Optional[str], optional): The title of the pull request. Defaults to None.
body (Optional[str], optional): The body of the pull request. Defaults to None.
issue (Optional[Issue], optional): The related issue of the pull request. Defaults to None.
base (str): The name of the base branch where the pull request will be merged.
head (str): The name of the branch that contains the changes for the pull request.
app_name (str): The name of the platform hosting the repository (e.g., "github", "gitlab", "bitbucket").
base_repo_name (str): The full name of the target repository (in the format "user/repo") where the pull request will be created.
head_repo_name (Optional[str]): The full name of the source repository (in the format "user/repo") from which the changes will be pulled.
title (Optional[str]): The title of the pull request. Defaults to None.
body (Optional[str]): The description or body content of the pull request. Defaults to None.
issue (Optional[Issue]): An optional issue related to the pull request. Defaults to None.
Example:
>>> # create pull request

View file

@ -1,8 +1,10 @@
import subprocess
import threading
from queue import Queue
import asyncio
from asyncio import Queue
from asyncio.subprocess import PIPE, STDOUT
from typing import Optional
from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_SETUP_PATH
from metagpt.logs import logger
from metagpt.tools.tool_registry import register_tool
from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter
@ -19,62 +21,54 @@ class Terminal:
def __init__(self):
self.shell_command = ["bash"] # FIXME: should consider windows support later
self.command_terminator = "\n"
# Start a persistent shell process
self.process = subprocess.Popen(
self.shell_command,
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
executable="/bin/bash",
)
self.stdout_queue = Queue()
self.stdout_queue = Queue(maxsize=1000)
self.observer = TerminalReporter()
self.process: Optional[asyncio.subprocess.Process] = None
self._check_state()
async def _start_process(self):
# Start a persistent shell process
self.process = await asyncio.create_subprocess_exec(
*self.shell_command, stdin=PIPE, stdout=PIPE, stderr=STDOUT, executable="bash"
)
await self._check_state()
def _check_state(self):
"""Check the state of the terminal, e.g. the current directory of the terminal process. Useful for agent to understand."""
print("The terminal is at:", self.run_command("pwd"))
async def _check_state(self):
"""
Check the state of the terminal, e.g. the current directory of the terminal process. Useful for agent to understand.
"""
output = await self.run_command("pwd")
logger.info("The terminal is at:", output)
def run_command(self, cmd: str, daemon=False) -> str:
async def run_command(self, cmd: str, daemon=False) -> str:
"""
Executes a specified command in the terminal and streams the output back in real time.
This command maintains state across executions, such as the current directory,
allowing for sequential commands to be contextually aware. The output from the
command execution is placed into `stdout_queue`, which can be consumed as needed.
allowing for sequential commands to be contextually aware.
Args:
cmd (str): The command to execute in the terminal.
daemon (bool): If True, executes the command in a background thread, allowing
the main program to continue execution. The command's output is
collected asynchronously in daemon mode and placed into `stdout_queue`.
daemon (bool): If True, executes the command in an asynchronous task, allowing
the main program to continue execution.
Returns:
str: The command's output or an empty string if `daemon` is True. Remember that
when `daemon` is True, the output is collected into `stdout_queue` and must
be consumed from there.
Note:
If `stdout_queue` is not periodically consumed, it could potentially grow indefinitely,
consuming memory. Ensure that there's a mechanism in place to consume this queue,
especially during long-running or output-heavy command executions.
when `daemon` is True, use the `get_stdout_output` method to get the output.
"""
if self.process is None:
await self._start_process()
# Send the command
self.process.stdin.write((cmd + self.command_terminator).encode())
self.process.stdin.write(
(f'echo "{END_MARKER_VALUE}"{self.command_terminator}').encode() # write EOF
f'echo "{END_MARKER_VALUE}"{self.command_terminator}'.encode() # write EOF
) # Unique marker to signal command end
self.process.stdin.flush()
await self.process.stdin.drain()
if daemon:
threading.Thread(target=self._read_and_process_output, args=(cmd,), daemon=True).start()
asyncio.create_task(self._read_and_process_output(cmd))
return ""
else:
return self._read_and_process_output(cmd)
return await self._read_and_process_output(cmd)
def execute_in_conda_env(self, cmd: str, env, daemon=False) -> str:
async def execute_in_conda_env(self, cmd: str, env, daemon=False) -> str:
"""
Executes a given command within a specified Conda environment automatically without
the need for manual activation. Users just need to provide the name of the Conda
@ -84,7 +78,7 @@ class Terminal:
cmd (str): The command to execute within the Conda environment.
env (str, optional): The name of the Conda environment to activate before executing the command.
If not specified, the command will run in the current active environment.
daemon (bool): If True, the command is run in a background thread, similar to `run_command`,
daemon (bool): If True, the command is run in an asynchronous task, similar to `run_command`,
affecting error logging and handling in the same manner.
Returns:
@ -96,19 +90,34 @@ class Terminal:
to ensure the specified environment is active for the command's execution.
"""
cmd = f"conda run -n {env} {cmd}"
return self.run_command(cmd, daemon=daemon)
return await self.run_command(cmd, daemon=daemon)
def _read_and_process_output(self, cmd):
with self.observer as observer:
async def get_stdout_output(self) -> str:
"""
Retrieves all collected output from background running commands and returns it as a string.
Returns:
str: The collected output from background running commands, returned as a string.
"""
output_lines = []
while not self.stdout_queue.empty():
line = await self.stdout_queue.get()
output_lines.append(line)
return "\n".join(output_lines)
async def _read_and_process_output(self, cmd, daemon=False) -> str:
async with self.observer as observer:
cmd_output = []
observer.report(cmd + self.command_terminator, "cmd")
# report the comman
await observer.async_report(cmd + self.command_terminator, "cmd")
# report the command
# Read the output until the unique marker is found.
# We read bytes directly from stdout instead of text because when reading text,
# '\r' is changed to '\n', resulting in excessive output.
tmp = b""
while True:
output = tmp + self.process.stdout.read(1)
output = tmp + await self.process.stdout.read(1)
if not output:
continue
*lines, tmp = output.splitlines(True)
for line in lines:
line = line.decode()
@ -116,20 +125,20 @@ class Terminal:
if ix >= 0:
line = line[0:ix]
if line:
observer.report(line, "output")
await observer.async_report(line, "output")
# report stdout in real-time
cmd_output.append(line)
return "".join(cmd_output)
# log stdout in real-time
observer.report(line, "output")
await observer.async_report(line, "output")
cmd_output.append(line)
self.stdout_queue.put(line)
if daemon:
await self.stdout_queue.put(line)
def close(self):
async def close(self):
"""Close the persistent shell process."""
self.process.stdin.close()
self.process.terminate()
self.process.wait()
await self.process.wait()
@register_tool(include_functions=["run"])
@ -142,10 +151,13 @@ class Bash(Terminal):
def __init__(self):
"""init"""
super().__init__()
self.run_command(f"cd {DEFAULT_WORKSPACE_ROOT}")
self.run_command(f"source {SWE_SETUP_PATH}")
self.start_flag = False
def run(self, cmd) -> str:
async def start(self):
await self.run_command(f"cd {DEFAULT_WORKSPACE_ROOT}")
await self.run_command(f"source {SWE_SETUP_PATH}")
async def run(self, cmd) -> str:
"""
Executes a bash command.
@ -184,9 +196,6 @@ class Bash(Terminal):
Arguments:
filename (str): The name of the file to create.
- submit
Submits your current code. it can only be executed once, the last action before the `end`.
- search_dir_and_preview <search_term> [<dir>]
Searches for search_term in all files in dir and gives their code preview
with line numbers. If dir is not provided, searches in the current directory.
@ -220,6 +229,13 @@ class Bash(Terminal):
end_line (int): The line number to end the edit at (inclusive), starting from 1.
replacement_text (str): The text to replace the current selection with, must conform to PEP8 standards.
- submit
Submits your current code locally. it can only be executed once, the last action before the `end`.
Note: Make sure to use these functions as per their defined arguments and behaviors.
"""
return self.run_command(cmd)
if not self.start_flag:
await self.start()
self.start_flag = True
return await self.run_command(cmd)

View file

@ -17,4 +17,3 @@ source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/search.sh
source $REPO_ROOT_DIR/metagpt/tools/swe_agent_commands/edit_linting.sh
export SWE_CMD_WORK_DIR="$REPO_ROOT_DIR/workspace/swe_agent_workdir"
#sudo chmod 777 $REPO_ROOT_DIR/workspace/swe_agent_workdir