mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
Merge branch 'mgx_ops' into feature-terminal
This commit is contained in:
commit
73afb493de
21 changed files with 517 additions and 67 deletions
26
examples/di/use_browser.py
Normal file
26
examples/di/use_browser.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import asyncio
|
||||
|
||||
from metagpt.roles.di.data_interpreter import DataInterpreter
|
||||
|
||||
# an example to showcase navigation
|
||||
MG_LLM_CONFIG_REQ = """
|
||||
This is a link to the doc site of MetaGPT project: https://docs.deepwisdom.ai/main/en/
|
||||
Check where you can go to on the site and try to find out the list of LLM APIs supported by MetaGPT.
|
||||
Don't write all codes in one response, each time, just write code for one step.
|
||||
"""
|
||||
|
||||
# an example to showcase searching
|
||||
PAPER_LIST_REQ = """"
|
||||
At https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
|
||||
find the first paper whose title includes `multiagent`, open it and summarize its abstract.
|
||||
Don't write all codes in one response, each time, just write code for one step.
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
di = DataInterpreter(tools=["Browser"], react_mode="react")
|
||||
await di.run(MG_LLM_CONFIG_REQ)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -6,12 +6,16 @@ import asyncio
|
|||
from metagpt.roles.di.mgx import MGX
|
||||
|
||||
requirement = (
|
||||
"design a game using Gym (an open source Python library), including a graphical interface and interactive gameplay"
|
||||
# "design a game using Gym (an open source Python library), including a graphical interface and interactive gameplay"
|
||||
# "帮我把pip的源设置成:https://pypi.tuna.tsinghua.edu.cn/simple"
|
||||
# "This is a website url does not require login: https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767 please write a similar web page,developed in vue language, The package.json dependency must be generated"
|
||||
"I would like to imitate the website available at https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767. Could you please browse through it?"
|
||||
# "Create a 2048 Game"
|
||||
)
|
||||
|
||||
|
||||
async def main(requirement: str = ""):
|
||||
mgx = MGX(use_intent=True)
|
||||
mgx = MGX(use_intent=True, tools=["<all>"])
|
||||
await mgx.run(requirement)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,36 +29,38 @@ class SOPItemDef(BaseModel):
|
|||
class SOPItem(Enum):
|
||||
SOFTWARE_DEVELOPMENT = SOPItemDef(
|
||||
name="software development",
|
||||
description="Intentions related to or including software development, such as developing or building software, games, app, websites, etc. Excluding bug fixes, report any issues.",
|
||||
description="Software development intention including developing or building software, games, app, websites, etc. EXCLUDING bug fixes, report any issues, environment setup, terminal operations, and pip install.",
|
||||
sop=[
|
||||
"Writes a PRD based on software requirements.",
|
||||
"Writes a design to the project repository, based on the PRD of the project.",
|
||||
"Writes a project plan to the project repository, based on the design of the project.",
|
||||
"Writes code to implement designed features according to the project plan and adds them to the project repository.",
|
||||
# "Run QA test on the project repository.",
|
||||
"Stage and commit changes for the project repository using Git.",
|
||||
],
|
||||
)
|
||||
FIX_BUGS = SOPItemDef(
|
||||
name="fix bugs",
|
||||
description="Fix bugs in a given project.",
|
||||
sop=[
|
||||
"Fix bugs in the project repository.",
|
||||
"Stage and commit changes for the project repository using Git.",
|
||||
],
|
||||
)
|
||||
FORMAT_REPO = SOPItemDef(
|
||||
name="format repo",
|
||||
description="download repository from git and format the project to MetaGPT project",
|
||||
sop=[
|
||||
"Imports a project from a Git website and formats it to MetaGPT project format to enable incremental appending requirements.",
|
||||
"Stage and commit changes for the project repository using Git.",
|
||||
],
|
||||
# FIX_BUGS = SOPItemDef(
|
||||
# name="fix bugs",
|
||||
# description="Fix bugs in a given project.",
|
||||
# sop=[
|
||||
# "Fix bugs in the project repository.",
|
||||
# "Stage and commit changes for the project repository using Git.",
|
||||
# ],
|
||||
# )
|
||||
# FORMAT_REPO = SOPItemDef(
|
||||
# name="format repo",
|
||||
# description="download repository from git and format the project to MetaGPT project",
|
||||
# sop=[
|
||||
# "Imports a project from a Git website and formats it to MetaGPT project format to enable incremental appending requirements.",
|
||||
# "Stage and commit changes for the project repository using Git.",
|
||||
# ],
|
||||
# )
|
||||
WEB_OPERATION = SOPItemDef(
|
||||
name="web operation",
|
||||
description="web browsing, scraping, imitation and other interaction with the web",
|
||||
)
|
||||
OTHER = SOPItemDef(
|
||||
name="other",
|
||||
description="Other intentions that do not fall into the above categories, including data science, machine learning, deep learning, etc.",
|
||||
sop=[],
|
||||
description="Other intentions that do not fall into the above categories, including data science, data analysis, machine learning, deep learning and text-to-image etc.",
|
||||
)
|
||||
|
||||
@property
|
||||
|
|
@ -86,8 +88,7 @@ Intention index:
|
|||
REQ_WITH_SOP = """
|
||||
{user_requirement}
|
||||
## Knowledge
|
||||
To meet user requirements, the following standard operating procedure(SOP) must be used.
|
||||
SOP descriptions cannot be modified; user requirements can only be appended to the end of corresponding steps.
|
||||
To meet user requirements, the following standard operating procedure(SOP) must be used:
|
||||
|
||||
{sop}
|
||||
"""
|
||||
|
|
@ -97,7 +98,7 @@ class DetectIntent(Action):
|
|||
async def run(self, with_message: Message, **kwargs) -> Tuple[str, str]:
|
||||
user_requirement = with_message.content
|
||||
mappings = {i + 1: si for i, si in enumerate(SOPItem)}
|
||||
intentions = "\n".join([f"{i+1}. {si.type_name}: {si.value.description}" for i, si in enumerate(SOPItem)])
|
||||
intentions = "\n".join([f"{i + 1}. {si.type_name}: {si.value.description}" for i, si in enumerate(SOPItem)])
|
||||
prompt = DETECT_PROMPT.format(user_requirement=user_requirement, intentions=intentions)
|
||||
|
||||
rsp = await self._aask(prompt)
|
||||
|
|
@ -110,7 +111,7 @@ class DetectIntent(Action):
|
|||
|
||||
req_with_sop = (
|
||||
REQ_WITH_SOP.format(
|
||||
user_requirement=user_requirement, sop="\n".join([f"{i+1}. {v}" for i, v in enumerate(sop)])
|
||||
user_requirement=user_requirement, sop="\n".join([f"{i + 1}. {v}" for i, v in enumerate(sop)])
|
||||
)
|
||||
if sop
|
||||
else user_requirement
|
||||
|
|
@ -121,7 +122,13 @@ class DetectIntent(Action):
|
|||
|
||||
async def main():
|
||||
# Example usage of the DetectIntent action
|
||||
user_requirements = ["Develop a 2048 game.", "Run data analysis on sklearn wine dataset"]
|
||||
user_requirements = [
|
||||
"Develop a 2048 game.",
|
||||
"Run data analysis on sklearn wine dataset",
|
||||
"帮我把pip的源设置成:https://pypi.tuna.tsinghua.edu.cn/simple",
|
||||
"This is a website url does not require login: https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767 please write a similar web page,developed in vue language, The package.json dependency must be generated",
|
||||
"I would like to imitate the website available at https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767. Could you please browse through it?",
|
||||
]
|
||||
detect_intent = DetectIntent()
|
||||
|
||||
for user_requirement in user_requirements:
|
||||
|
|
|
|||
|
|
@ -24,7 +24,10 @@ from rich.panel import Panel
|
|||
from rich.syntax import Syntax
|
||||
|
||||
from metagpt.actions import Action
|
||||
from metagpt.logs import logger
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT
|
||||
from metagpt.logs import ToolLogItem, log_tool_output, logger
|
||||
|
||||
INSTALL_KEEPLEN = 500
|
||||
|
||||
|
||||
class ExecuteNbCode(Action):
|
||||
|
|
@ -43,7 +46,7 @@ class ExecuteNbCode(Action):
|
|||
):
|
||||
super().__init__(
|
||||
nb=nb,
|
||||
nb_client=NotebookClient(nb, timeout=timeout),
|
||||
nb_client=NotebookClient(nb, timeout=timeout, resources={"metadata": {"path": DEFAULT_WORKSPACE_ROOT}}),
|
||||
timeout=timeout,
|
||||
console=Console(),
|
||||
interaction=("ipython" if self.is_ipython() else "terminal"),
|
||||
|
|
@ -206,6 +209,11 @@ class ExecuteNbCode(Action):
|
|||
|
||||
if "!pip" in code:
|
||||
success = False
|
||||
outputs = outputs[-INSTALL_KEEPLEN:]
|
||||
|
||||
file_path = DEFAULT_WORKSPACE_ROOT / "code.ipynb"
|
||||
nbformat.write(self.nb, file_path)
|
||||
log_tool_output(ToolLogItem(name="file_path", value=file_path), tool_name="ExecuteNbCode")
|
||||
|
||||
return outputs, success
|
||||
|
||||
|
|
|
|||
|
|
@ -16,34 +16,34 @@ from metagpt.schema import Message, Plan, Task
|
|||
from metagpt.strategy.task_type import TaskType
|
||||
from metagpt.utils.common import CodeParser
|
||||
|
||||
PROMPT_TEMPLATE: str = """
|
||||
# Context:
|
||||
{context}
|
||||
# Available Task Types:
|
||||
{task_type_desc}
|
||||
# Task:
|
||||
Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to {max_tasks} tasks.
|
||||
If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
|
||||
If you encounter errors on the current task, revise and output the current single task only.
|
||||
Output a list of jsons following the format:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"task_id": str = "unique identifier for a task in plan, can be an ordinal",
|
||||
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
|
||||
"instruction": "what you should do in this task, one short phrase or sentence.",
|
||||
"task_type": "type of this task, should be one of Available Task Types.",
|
||||
}},
|
||||
...
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
class WritePlan(Action):
|
||||
PROMPT_TEMPLATE: str = """
|
||||
# Context:
|
||||
{context}
|
||||
# Available Task Types:
|
||||
{task_type_desc}
|
||||
# Task:
|
||||
Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to {max_tasks} tasks.
|
||||
If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
|
||||
If you encounter errors on the current task, revise and output the current single task only.
|
||||
Output a list of jsons following the format:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"task_id": str = "unique identifier for a task in plan, can be an ordinal",
|
||||
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
|
||||
"instruction": "what you should do in this task, one short phrase or sentence",
|
||||
"task_type": "type of this task, should be one of Available Task Types",
|
||||
}},
|
||||
...
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
async def run(self, context: list[Message], max_tasks: int = 5) -> str:
|
||||
task_type_desc = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])
|
||||
prompt = self.PROMPT_TEMPLATE.format(
|
||||
prompt = PROMPT_TEMPLATE.format(
|
||||
context="\n".join([str(ct) for ct in context]), max_tasks=max_tasks, task_type_desc=task_type_desc
|
||||
)
|
||||
rsp = await self._aask(prompt)
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ class WriteTasks(Action):
|
|||
dependencies={system_design_doc.root_relative_path},
|
||||
)
|
||||
await self._update_requirements(task_doc)
|
||||
await self.repo.resources.api_spec_and_task.save_pdf(doc=task_doc)
|
||||
return task_doc
|
||||
|
||||
async def _run_new_tasks(self, context):
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from metagpt.utils.yaml_model import YamlModel
|
|||
class MermaidConfig(YamlModel):
|
||||
"""Config for Mermaid"""
|
||||
|
||||
engine: Literal["nodejs", "ink", "playwright", "pyppeteer"] = "nodejs"
|
||||
engine: Literal["nodejs", "ink", "playwright", "pyppeteer", "none"] = "nodejs"
|
||||
path: str = "mmdc" # mmdc
|
||||
puppeteer_config: str = ""
|
||||
pyppeteer_path: str = "/usr/bin/google-chrome-stable"
|
||||
|
|
|
|||
|
|
@ -135,3 +135,6 @@ AGGREGATION = "Aggregate"
|
|||
# Timeout
|
||||
USE_CONFIG_TIMEOUT = 0 # Using llm.timeout configuration.
|
||||
LLM_API_TIMEOUT = 300
|
||||
|
||||
# Assistant alias
|
||||
ASSISTANT_ALIAS = "response"
|
||||
|
|
|
|||
|
|
@ -54,6 +54,11 @@ def log_tool_output(output: ToolLogItem | list[ToolLogItem], tool_name: str = ""
|
|||
_tool_output_log(output=output, tool_name=tool_name)
|
||||
|
||||
|
||||
async def log_tool_output_async(output: ToolLogItem | list[ToolLogItem], tool_name: str = ""):
|
||||
"""async interface for logging tool output, used when output contains async object"""
|
||||
await _tool_output_log_async(output=output, tool_name=tool_name)
|
||||
|
||||
|
||||
def set_llm_stream_logfunc(func):
|
||||
global _llm_stream_log
|
||||
_llm_stream_log = func
|
||||
|
|
@ -64,9 +69,20 @@ def set_tool_output_logfunc(func):
|
|||
_tool_output_log = func
|
||||
|
||||
|
||||
async def set_tool_output_logfunc_async(func):
|
||||
# async version
|
||||
global _tool_output_log_async
|
||||
_tool_output_log_async = func
|
||||
|
||||
|
||||
_llm_stream_log = partial(print, end="")
|
||||
|
||||
|
||||
_tool_output_log = (
|
||||
lambda *args, **kwargs: None
|
||||
) # a dummy function to avoid errors if set_tool_output_logfunc is not called
|
||||
|
||||
|
||||
async def _tool_output_log_async(*args, **kwargs):
|
||||
# async version
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -4,10 +4,11 @@
|
|||
import asyncio
|
||||
from typing import Dict
|
||||
|
||||
from metagpt.actions.di.detect_intent import DetectIntent
|
||||
from metagpt.actions.di.detect_intent import DetectIntent, SOPItem
|
||||
from metagpt.logs import logger
|
||||
from metagpt.roles.di.data_interpreter import DataInterpreter
|
||||
from metagpt.schema import Message
|
||||
from metagpt.tools.tool_recommend import BM25ToolRecommender
|
||||
|
||||
|
||||
class MGX(DataInterpreter):
|
||||
|
|
@ -18,6 +19,10 @@ class MGX(DataInterpreter):
|
|||
todo = DetectIntent(context=self.context)
|
||||
request_with_sop, sop_type = await todo.run(user_msg)
|
||||
logger.info(f"{sop_type} {request_with_sop}")
|
||||
if sop_type == SOPItem.SOFTWARE_DEVELOPMENT.type_name:
|
||||
self.tool_recommender = BM25ToolRecommender(tools=["software development"])
|
||||
else:
|
||||
self.tool_recommender = BM25ToolRecommender(tools=["<all>"])
|
||||
return request_with_sop
|
||||
|
||||
async def _plan_and_act(self) -> Message:
|
||||
|
|
@ -28,6 +33,7 @@ class MGX(DataInterpreter):
|
|||
if self.use_intent: # add mode
|
||||
user_message = Message(content=goal, role="user")
|
||||
goal = await self._detect_intent(user_message)
|
||||
|
||||
logger.info(f"Goal is {goal}")
|
||||
|
||||
await self.planner.update_plan(goal=goal)
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ class Planner(BaseModel):
|
|||
If human confirms the task result, then we deem the task completed, regardless of whether the code run succeeds;
|
||||
if auto mode, then the code run has to succeed for the task to be considered completed.
|
||||
"""
|
||||
auto_run = auto_run or self.auto_run
|
||||
auto_run = auto_run if auto_run is not None else self.auto_run
|
||||
if not auto_run:
|
||||
context = self.get_useful_memories()
|
||||
review, confirmed = await AskReview().run(
|
||||
|
|
|
|||
|
|
@ -67,6 +67,10 @@ class TaskType(Enum):
|
|||
name="email login",
|
||||
desc="For logging to an email.",
|
||||
)
|
||||
DEVELOP_SOFTWARE = TaskTypeDef(
|
||||
name="develop software",
|
||||
desc="SOP related to develop software such as Writes a PRD, Writes a design, Writes a project plan and Writes code to implement designed features according to the project plan",
|
||||
)
|
||||
|
||||
@property
|
||||
def type_name(self):
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from metagpt.tools.libs import (
|
|||
email_login,
|
||||
terminal,
|
||||
file_manager,
|
||||
browser,
|
||||
)
|
||||
from metagpt.tools.libs.software_development import (
|
||||
write_prd,
|
||||
|
|
@ -40,4 +41,5 @@ _ = (
|
|||
git_archive,
|
||||
terminal,
|
||||
file_manager,
|
||||
browser,
|
||||
) # Avoid pre-commit error
|
||||
|
|
|
|||
197
metagpt/tools/libs/browser.py
Normal file
197
metagpt/tools/libs/browser.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
from playwright.async_api import async_playwright
|
||||
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT
|
||||
from metagpt.logs import ToolLogItem, log_tool_output_async
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import encode_image
|
||||
|
||||
|
||||
@register_tool()
|
||||
class Browser:
|
||||
"""
|
||||
A tool for browsing the web. Don't initialize a new instance of this class if one already exists.
|
||||
Note: Combine searching, scrolling, extraction, and link finding together to achieve most effective browsing. DON'T stick to one method.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""initiate the browser, create pages placeholder later to be managed as {page_url: page object}"""
|
||||
self.browser = None
|
||||
|
||||
from metagpt.config2 import config
|
||||
from metagpt.llm import LLM
|
||||
|
||||
self.llm = LLM(llm_config=config.get_openai_llm())
|
||||
self.llm.model = "gpt-4-vision-preview"
|
||||
|
||||
# browser status management
|
||||
self.pages = {}
|
||||
self.current_page_url = None
|
||||
self.current_page = None
|
||||
|
||||
async def start(self):
|
||||
"""Starts Playwright and launches a browser"""
|
||||
self.playwright = await async_playwright().start()
|
||||
self.browser = await self.playwright.chromium.launch()
|
||||
|
||||
def _set_current_page(self, page, url):
|
||||
self.current_page = page
|
||||
self.current_page_url = url
|
||||
print("Now on page ", url)
|
||||
|
||||
async def open_new_page(self, url: str):
|
||||
"""open a new page in the browser, set it as the current page"""
|
||||
page = await self.browser.new_page()
|
||||
await page.goto(url)
|
||||
self.pages[url] = page
|
||||
self._set_current_page(page, url)
|
||||
await log_tool_output_async(
|
||||
ToolLogItem(type="object", name="open_new_page", value=self.current_page), tool_name="Browser"
|
||||
)
|
||||
|
||||
async def switch_page(self, url: str):
|
||||
"""switch to an opened page in the browser, set it as the current page"""
|
||||
if url in self.pages:
|
||||
self._set_current_page(self.pages[url], url)
|
||||
await log_tool_output_async(
|
||||
ToolLogItem(type="object", name="switch_page", value=self.current_page), tool_name="Browser"
|
||||
)
|
||||
else:
|
||||
print(f"Page not found: {url}")
|
||||
|
||||
async def search_content_all(self, search_term: str) -> list[dict]:
|
||||
"""search all occurences of search term in the current page and return the search results with their position.
|
||||
Useful if you have a keyword or sentence in mind and want to quickly narrow down the content relevant to it.
|
||||
|
||||
Args:
|
||||
search_term (str): the search term
|
||||
|
||||
Returns:
|
||||
list[dict]: a list of dictionaries containing the elements and their positions, e.g.
|
||||
[
|
||||
{
|
||||
"index": ...,
|
||||
"content": {
|
||||
"text_block": ...,
|
||||
"links": [
|
||||
{"text": ..., "href": ...},
|
||||
...
|
||||
]
|
||||
},
|
||||
"position": {from_top: ..., from_left: ...},
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
locator = self.current_page.locator(f"text={search_term}")
|
||||
count = await locator.count()
|
||||
search_results = []
|
||||
for i in range(count):
|
||||
element = locator.nth(i)
|
||||
if await element.is_visible():
|
||||
position = await element.evaluate("e => ({ from_top: e.offsetTop, from_left: e.offsetLeft })")
|
||||
|
||||
# Retrieve the surrounding block of text and links with their text
|
||||
content = await element.evaluate(
|
||||
"""
|
||||
(element) => {
|
||||
// const block = element.closest('p, div, section, article');
|
||||
const block = element.parentElement;
|
||||
return {
|
||||
text_block: block.innerText,
|
||||
// Create an array of objects, each containing the text and href of a link
|
||||
links: Array.from(block.querySelectorAll('a')).map(a => ({
|
||||
text: a.innerText,
|
||||
href: a.href
|
||||
}))
|
||||
};
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
search_results.append(
|
||||
{"index": len(search_results), "content": content, "position": position, "element_obj": element}
|
||||
)
|
||||
|
||||
print(f"Found {len(search_results)} instances of the term '{search_term}':\n\n{search_results}")
|
||||
|
||||
return search_results
|
||||
|
||||
async def scroll_to_search_result(self, search_results: list[dict], index: int = 0):
|
||||
"""Scroll to the index-th search result, potentially for subsequent perception.
|
||||
Useful if you have located a search result, the search result does not fulfill your requirement, and you need more information around that search result. Can only be used after search_all_content.
|
||||
|
||||
Args:
|
||||
search_results (list[dict]): search_results from search_content_all
|
||||
index (int, optional): the index of the search result to scroll to. Index starts from 0. Defaults to 0.
|
||||
"""
|
||||
if not search_results:
|
||||
return {}
|
||||
if index >= len(search_results):
|
||||
print(f"Index {index} is out of range. Scrolling to the last instance.")
|
||||
index = len(search_results) - 1
|
||||
element = search_results[index]["element_obj"]
|
||||
await element.scroll_into_view_if_needed()
|
||||
print(f"Successfully scrolled to the {index}-th search result, consider extract more info around it.")
|
||||
await log_tool_output_async(
|
||||
ToolLogItem(type="object", name="scroll_page", value=self.current_page), tool_name="Browser"
|
||||
)
|
||||
|
||||
async def find_links(self) -> list:
|
||||
"""Finds all links in the current page and returns a list of dictionaries with link text and the URL.
|
||||
Useful for navigating to more pages and exploring more resources.
|
||||
|
||||
Returns:
|
||||
list: A list of dictionaries, each containing 'text' and 'href' keys.
|
||||
"""
|
||||
# Use a CSS selector to find all <a> elements in the page.
|
||||
links = await self.current_page.query_selector_all("a")
|
||||
|
||||
# Prepare an empty list to hold link information.
|
||||
link_info = []
|
||||
|
||||
# Iterate over each link element to extract its text and href attributes.
|
||||
for link in links:
|
||||
text = await link.text_content()
|
||||
href = await link.get_attribute("href")
|
||||
link_info.append({"text": text, "href": href})
|
||||
|
||||
print(f"Found {len(link_info)} links:\n\n{link_info}")
|
||||
|
||||
return link_info
|
||||
|
||||
async def extract_info_from_view(self, instruction: str) -> str:
|
||||
"""
|
||||
Extract useful info from the current page view.
|
||||
|
||||
Args:
|
||||
instruction (str): explain what info needs to be extracted
|
||||
|
||||
Returns:
|
||||
str: extracted info from current view
|
||||
"""
|
||||
img_path = DEFAULT_WORKSPACE_ROOT / "screenshot_temp.png"
|
||||
await self.current_page.screenshot(path=img_path)
|
||||
rsp = await self.llm.aask(msg=instruction, images=[encode_image(img_path)])
|
||||
return rsp
|
||||
|
||||
async def scroll_current_page(self, offset: int = 500):
|
||||
"""scroll the current page by offset pixels, negative value means scrolling up, returning the content observed after scrolling"""
|
||||
await self.current_page.evaluate(f"window.scrollBy(0, {offset})")
|
||||
print(f"Scrolled current page by {offset} pixels. Perceive the scrolled view if needed")
|
||||
await log_tool_output_async(
|
||||
ToolLogItem(type="object", name="scroll_page", value=self.current_page), tool_name="Browser"
|
||||
)
|
||||
|
||||
def check_all_pages(self) -> dict:
|
||||
"""return all pages opened in the browser, a dictionary with {page_url: page_title}, useful for understanding the current browser state"""
|
||||
pages_info = {url: page.title() for url, page in self.pages.items()}
|
||||
return pages_info
|
||||
|
||||
async def close(self):
|
||||
"""close the browser and all pages"""
|
||||
await self.browser.close()
|
||||
await self.playwright.stop()
|
||||
|
||||
|
||||
async def get_scroll_position(page):
|
||||
return await page.evaluate("() => ({ x: window.scrollX, y: window.scrollY })")
|
||||
|
|
@ -5,11 +5,12 @@ from __future__ import annotations
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from metagpt.const import BUGFIX_FILENAME, REQUIREMENT_FILENAME
|
||||
from metagpt.const import ASSISTANT_ALIAS, BUGFIX_FILENAME, REQUIREMENT_FILENAME
|
||||
from metagpt.logs import ToolLogItem, log_tool_output
|
||||
from metagpt.schema import BugFixContext, Message
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import any_to_str
|
||||
from metagpt.utils.project_repo import ProjectRepo
|
||||
|
||||
|
||||
@register_tool(tags=["software development", "ProductManager"])
|
||||
|
|
@ -42,22 +43,33 @@ async def write_prd(idea: str, project_path: Optional[str | Path] = None) -> Pat
|
|||
from metagpt.context import Context
|
||||
from metagpt.roles import ProductManager
|
||||
|
||||
log_tool_output(output=[ToolLogItem(name=ASSISTANT_ALIAS, value=write_prd.__name__)], tool_name=write_prd.__name__)
|
||||
|
||||
ctx = Context()
|
||||
if project_path and Path(project_path).exists():
|
||||
ctx.config.project_path = Path(project_path)
|
||||
ctx.config.inc = True
|
||||
|
||||
role = ProductManager(context=ctx)
|
||||
msg = await role.run(with_message=Message(content=idea, cause_by=UserRequirement))
|
||||
await role.run(with_message=msg)
|
||||
|
||||
outputs = [
|
||||
ToolLogItem(name="PRD File", value=str(ctx.repo.docs.prd.workdir / i))
|
||||
ToolLogItem(name="Intermedia PRD File", value=str(ctx.repo.docs.prd.workdir / i))
|
||||
for i in ctx.repo.docs.prd.changed_files.keys()
|
||||
]
|
||||
for i in ctx.repo.resources.competitive_analysis.changed_files.keys():
|
||||
outputs.append(
|
||||
outputs.extend(
|
||||
[
|
||||
ToolLogItem(name="PRD File", value=str(ctx.repo.resources.prd.workdir / i))
|
||||
for i in ctx.repo.resources.prd.changed_files.keys()
|
||||
]
|
||||
)
|
||||
outputs.extend(
|
||||
[
|
||||
ToolLogItem(name="Competitive Analysis", value=str(ctx.repo.resources.competitive_analysis.workdir / i))
|
||||
)
|
||||
for i in ctx.repo.resources.competitive_analysis.changed_files.keys()
|
||||
]
|
||||
)
|
||||
log_tool_output(output=outputs, tool_name=write_prd.__name__)
|
||||
|
||||
return ctx.repo.docs.prd.workdir
|
||||
|
|
@ -85,6 +97,10 @@ async def write_design(prd_path: str | Path) -> Path:
|
|||
from metagpt.context import Context
|
||||
from metagpt.roles import Architect
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=write_design.__name__)], tool_name=write_design.__name__
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
prd_path = Path(prd_path)
|
||||
project_path = (Path(prd_path) if not prd_path.is_file() else prd_path.parent) / "../.."
|
||||
|
|
@ -132,6 +148,11 @@ async def write_project_plan(system_design_path: str | Path) -> Path:
|
|||
from metagpt.context import Context
|
||||
from metagpt.roles import ProjectManager
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=write_project_plan.__name__)],
|
||||
tool_name=write_project_plan.__name__,
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
system_design_path = Path(system_design_path)
|
||||
project_path = (system_design_path if not system_design_path.is_file() else system_design_path.parent) / "../.."
|
||||
|
|
@ -141,9 +162,15 @@ async def write_project_plan(system_design_path: str | Path) -> Path:
|
|||
await role.run(with_message=Message(content="", cause_by=WriteDesign))
|
||||
|
||||
outputs = [
|
||||
ToolLogItem(name="Project Plan", value=str(ctx.repo.docs.task.workdir / i))
|
||||
ToolLogItem(name="Intermedia Project Plan", value=str(ctx.repo.docs.task.workdir / i))
|
||||
for i in ctx.repo.docs.task.changed_files.keys()
|
||||
]
|
||||
outputs.extend(
|
||||
[
|
||||
ToolLogItem(name="Project Plan", value=str(ctx.repo.resources.api_spec_and_task.workdir / i))
|
||||
for i in ctx.repo.resources.api_spec_and_task.changed_files.keys()
|
||||
]
|
||||
)
|
||||
log_tool_output(output=outputs, tool_name=write_project_plan.__name__)
|
||||
|
||||
return ctx.repo.docs.task.workdir
|
||||
|
|
@ -179,6 +206,10 @@ async def write_codes(task_path: str | Path, inc: bool = False) -> Path:
|
|||
from metagpt.context import Context
|
||||
from metagpt.roles import Engineer
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=write_codes.__name__)], tool_name=write_codes.__name__
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
ctx.config.inc = inc
|
||||
task_path = Path(task_path)
|
||||
|
|
@ -222,6 +253,10 @@ async def run_qa_test(src_path: str | Path) -> Path:
|
|||
from metagpt.environment import Environment
|
||||
from metagpt.roles import QaEngineer
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=run_qa_test.__name__)], tool_name=run_qa_test.__name__
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
src_path = Path(src_path)
|
||||
project_path = (src_path if not src_path.is_file() else src_path.parent) / ".."
|
||||
|
|
@ -270,6 +305,8 @@ async def fix_bug(project_path: str | Path, issue: str) -> Path:
|
|||
from metagpt.context import Context
|
||||
from metagpt.roles import Engineer
|
||||
|
||||
log_tool_output(output=[ToolLogItem(name=ASSISTANT_ALIAS, value=fix_bug.__name__)], tool_name=fix_bug.__name__)
|
||||
|
||||
ctx = Context()
|
||||
ctx.set_repo_dir(project_path)
|
||||
ctx.src_workspace = ctx.git_repo.workdir / ctx.git_repo.workdir.name
|
||||
|
|
@ -325,11 +362,21 @@ async def git_archive(project_path: str | Path) -> str:
|
|||
"""
|
||||
from metagpt.context import Context
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=git_archive.__name__)], tool_name=git_archive.__name__
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
ctx.set_repo_dir(project_path)
|
||||
project_dir = ProjectRepo.search_project_path(project_path)
|
||||
if not project_dir:
|
||||
ValueError(f"{project_path} is not a valid git repository.")
|
||||
ctx.set_repo_dir(project_dir)
|
||||
files = " ".join(ctx.git_repo.changed_files.keys())
|
||||
outputs = [ToolLogItem(name="cmd", value=f"git add {files}")]
|
||||
log_tool_output(output=outputs, tool_name=git_archive.__name__)
|
||||
ctx.git_repo.archive()
|
||||
|
||||
outputs = [ToolLogItem(name="Git Commit", value=str(ctx.repo.workdir))]
|
||||
outputs = [ToolLogItem(name="cmd", value="git commit -m 'Archive'")]
|
||||
log_tool_output(output=outputs, tool_name=git_archive.__name__)
|
||||
|
||||
return ctx.git_repo.log()
|
||||
|
|
@ -358,6 +405,10 @@ async def import_git_repo(url: str) -> Path:
|
|||
from metagpt.actions.import_repo import ImportRepo
|
||||
from metagpt.context import Context
|
||||
|
||||
log_tool_output(
|
||||
output=[ToolLogItem(name=ASSISTANT_ALIAS, value=import_git_repo.__name__)], tool_name=import_git_repo.__name__
|
||||
)
|
||||
|
||||
ctx = Context()
|
||||
action = ImportRepo(repo_path=url, context=ctx)
|
||||
await action.run()
|
||||
|
|
|
|||
|
|
@ -20,8 +20,7 @@ def convert_code_to_tool_schema(obj, include: list[str] = None) -> dict:
|
|||
continue
|
||||
# method_doc = inspect.getdoc(method)
|
||||
method_doc = get_class_method_docstring(obj, name)
|
||||
if method_doc:
|
||||
schema["methods"][name] = function_docstring_to_schema(method, method_doc)
|
||||
schema["methods"][name] = function_docstring_to_schema(method, method_doc)
|
||||
|
||||
elif inspect.isfunction(obj):
|
||||
schema = function_docstring_to_schema(obj, docstring)
|
||||
|
|
@ -39,7 +38,7 @@ def convert_code_to_tool_schema_ast(code: str) -> list[dict]:
|
|||
return visitor.get_tool_schemas()
|
||||
|
||||
|
||||
def function_docstring_to_schema(fn_obj, docstring) -> dict:
|
||||
def function_docstring_to_schema(fn_obj, docstring="") -> dict:
|
||||
"""
|
||||
Converts a function's docstring into a schema dictionary.
|
||||
|
||||
|
|
|
|||
|
|
@ -783,13 +783,15 @@ def load_mc_skills_code(skill_names: list[str] = None, skills_dir: Path = None)
|
|||
return skills
|
||||
|
||||
|
||||
def encode_image(image_path_or_pil: Union[Path, Image], encoding: str = "utf-8") -> str:
|
||||
def encode_image(image_path_or_pil: Union[Path, Image, str], encoding: str = "utf-8") -> str:
|
||||
"""encode image from file or PIL.Image into base64"""
|
||||
if isinstance(image_path_or_pil, Image.Image):
|
||||
buffer = BytesIO()
|
||||
image_path_or_pil.save(buffer, format="JPEG")
|
||||
bytes_data = buffer.getvalue()
|
||||
else:
|
||||
if isinstance(image_path_or_pil, str):
|
||||
image_path_or_pil = Path(image_path_or_pil)
|
||||
if not image_path_or_pil.exists():
|
||||
raise FileNotFoundError(f"{image_path_or_pil} not exists")
|
||||
with open(str(image_path_or_pil), "rb") as image_file:
|
||||
|
|
|
|||
|
|
@ -81,6 +81,8 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt
|
|||
from metagpt.utils.mmdc_ink import mermaid_to_file
|
||||
|
||||
return await mermaid_to_file(mermaid_code, output_file_without_suffix)
|
||||
elif engine == "none":
|
||||
return 0
|
||||
else:
|
||||
logger.warning(f"Unsupported mermaid engine: {engine}")
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from metagpt.const import (
|
||||
CLASS_VIEW_FILE_REPO,
|
||||
|
|
@ -148,3 +149,14 @@ class ProjectRepo(FileRepository):
|
|||
@property
|
||||
def src_relative_path(self) -> Path | None:
|
||||
return self._srcs_path
|
||||
|
||||
@staticmethod
|
||||
def search_project_path(filename: str | Path) -> Optional[Path]:
|
||||
root = Path(filename).parent if Path(filename).is_file() else Path(filename)
|
||||
root = root.resolve()
|
||||
while str(root) != "/":
|
||||
git_repo = root / ".git"
|
||||
if git_repo.exists():
|
||||
return root
|
||||
root = root.parent
|
||||
return None
|
||||
|
|
|
|||
90
tests/metagpt/tools/libs/test_browser.py
Normal file
90
tests/metagpt/tools/libs/test_browser.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
import pytest
|
||||
|
||||
from metagpt.const import TEST_DATA_PATH
|
||||
from metagpt.tools.libs.browser import Browser, get_scroll_position
|
||||
|
||||
TEST_URL = "https://docs.deepwisdom.ai/main/en/guide/get_started/quickstart.html"
|
||||
|
||||
TEST_SCREENSHOT_PATH = TEST_DATA_PATH / "screenshot.png"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def llm_mock(rsp_cache, mocker, request):
|
||||
# An empty fixture to overwrite the global llm_mock fixture
|
||||
# because in provider folder, we want to test the aask and aask functions for the specific models
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def browser():
|
||||
browser_instance = Browser()
|
||||
yield browser_instance
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_and_switch_page(browser):
|
||||
await browser.start()
|
||||
|
||||
await browser.open_new_page("https://baidu.com")
|
||||
await browser.open_new_page("https://tencent.com")
|
||||
assert browser.current_page_url == "https://tencent.com"
|
||||
await browser.switch_page("https://baidu.com")
|
||||
assert browser.current_page_url == "https://baidu.com"
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_search(browser):
|
||||
await browser.start()
|
||||
|
||||
# search all
|
||||
await browser.open_new_page(TEST_URL)
|
||||
search_term = "startup example"
|
||||
search_results = await browser.search_content_all(search_term)
|
||||
print(search_results)
|
||||
# expected search result as of 20240410:
|
||||
# [{'index': 0, 'content': {'text_block': 'Below is a breakdown of the software startup example. If you install MetaGPT with the git clone approach, simply run', 'links': [{'text': 'software startup example', 'href': 'https://github.com/geekan/MetaGPT/blob/main/metagpt/software_company.py'}]}, 'position': {'from_top': 640, 'from_left': 225}, 'element_obj': <Locator frame=<Frame name= url='https://docs.deepwisdom.ai/main/en/guide/get_started/quickstart.html'> selector='text=startup example >> nth=0'>}]
|
||||
first_result = search_results[0]["content"]
|
||||
assert "software startup example" in first_result["text_block"]
|
||||
assert first_result["links"]
|
||||
assert first_result["links"][0]["href"] == "https://github.com/geekan/MetaGPT/blob/main/metagpt/software_company.py"
|
||||
assert search_results[0]["position"]
|
||||
|
||||
# scroll to search result
|
||||
await browser.scroll_to_search_result(search_results, index=0)
|
||||
|
||||
# perceive current view
|
||||
rsp = await browser.extract_info_from_view("what is the command to run exactly?")
|
||||
assert "metagpt" in rsp
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_find_links(browser):
|
||||
await browser.start()
|
||||
|
||||
await browser.open_new_page(TEST_URL)
|
||||
link_info = await browser.find_links()
|
||||
assert link_info
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scroll(browser):
|
||||
await browser.start()
|
||||
|
||||
await browser.open_new_page(TEST_URL)
|
||||
|
||||
await browser.scroll_current_page(offset=-500)
|
||||
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 0} # no change if you scrol up from top
|
||||
|
||||
await browser.scroll_current_page(offset=500) # scroll down
|
||||
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 500}
|
||||
|
||||
await browser.scroll_current_page(offset=-200) # scroll up
|
||||
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 300}
|
||||
|
||||
await browser.close()
|
||||
|
|
@ -48,6 +48,14 @@ class DummyClass:
|
|||
pass
|
||||
|
||||
|
||||
class DummySubClass(DummyClass):
|
||||
"""sub class docstring"""
|
||||
|
||||
def sub_method(self, df: pd.DataFrame):
|
||||
"""sub method"""
|
||||
pass
|
||||
|
||||
|
||||
def dummy_fn(
|
||||
df: pd.DataFrame,
|
||||
s: str,
|
||||
|
|
@ -117,6 +125,18 @@ def test_convert_code_to_tool_schema_class():
|
|||
assert schema == expected
|
||||
|
||||
|
||||
def test_convert_code_to_tool_schema_subclass():
|
||||
schema = convert_code_to_tool_schema(DummySubClass)
|
||||
assert "sub_method" in schema["methods"] # sub class method should be included
|
||||
assert "fit" in schema["methods"] # parent class method should be included
|
||||
|
||||
|
||||
def test_convert_code_to_tool_schema_include():
|
||||
schema = convert_code_to_tool_schema(DummyClass, include=["fit"])
|
||||
assert "fit" in schema["methods"]
|
||||
assert "transform" not in schema["methods"]
|
||||
|
||||
|
||||
def test_convert_code_to_tool_schema_function():
|
||||
expected = {
|
||||
"type": "function",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue