Merge branch 'mgx_ops' of https://gitlab.deepwisdomai.com/pub/MetaGPT into sd_tools

This commit is contained in:
stellahsr 2024-04-12 17:35:44 +08:00
commit 5b1a66bdec
20 changed files with 544 additions and 62 deletions

View file

@ -11,10 +11,10 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main():
web_url = "https://pytorch.org/"
prompt = f"""This is a URL of webpage: '{web_url}' .
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Firstly, open the page and take a screenshot of the page.
Secondly, convert the image to a webpage including HTML, CSS and JS in one go.
Note: All required dependencies and environments have been fully installed and configured."""
di = DataInterpreter(tools=["GPTvGenerator"])
di = DataInterpreter(tools=["GPTvGenerator", "Browser"])
await di.run(prompt)

View file

@ -1,16 +1,15 @@
import asyncio
from metagpt.roles.di.mgx import MGX
from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
# di = DataInterpreter()
di = MGX(use_intent=False, tools=["<all>"])
di = DataInterpreter()
await di.run(requirement)
if __name__ == "__main__":
image_path = r"F:\deepWisdom\metaGPT\hsr\MetaGPT\examples\data\dog.beebf16d.jpg"
save_path = r"F:\deepWisdom\metaGPT\hsr\MetaGPT\examples\data\/image_rm_bg.png"
image_path = "/your/path/to/the/image.jpeg"
save_path = "/your/intended/save/path/for/image_rm_bg.png"
requirement = f"This is a image, you need to use python toolkit rembg to remove the background of the image and save the result. image path:{image_path}; save path:{save_path}."
asyncio.run(main(requirement))

20
examples/di/run_flask.py Normal file
View file

@ -0,0 +1,20 @@
import asyncio
from metagpt.roles.di.data_interpreter import DataInterpreter
USE_GOT_REPO_REQ = """
Write a service using Flask, create a conda environment and run it, and call the service's interface for validation.
Notice: Don't write all codes in one response, each time, just write code for one step.
"""
# If you have created a conda environment, you can say:
# I have created the conda environment '{env_name}', please use this environment to execute.
async def main():
di = DataInterpreter(tools=["Terminal", "FileManager"])
await di.run(USE_GOT_REPO_REQ)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,29 @@
import asyncio
from metagpt.roles.di.data_interpreter import DataInterpreter
MG_LLM_CONFIG_REQ = """
This is a link to the doc site of MetaGPT project: https://docs.deepwisdom.ai/main/en/
Check where you can go to on the site and try to find out the list of LLM APIs supported by MetaGPT.
Don't write all codes in one response, each time, just write code for one step.
"""
PAPER_LIST_REQ = """"
At https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
find the first paper whose title includes `multiagent`, open it and summarize its abstract.
Don't write all codes in one response, each time, just write code for one step.
"""
DESCRIBE_GITHUB_ISSUE_REQ = """
Visit https://github.com/geekan/MetaGPT, navigate to Issues page, open the first issue related to DataInterpreter, then summarize what the issue is in one sentence.
Don't write all codes in one response, each time, just write code for one step.
"""
async def main():
di = DataInterpreter(tools=["Browser"], react_mode="react")
await di.run(MG_LLM_CONFIG_REQ)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -5,7 +5,8 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
USE_GOT_REPO_REQ = """
This is a link to the GOT github repo: https://github.com/spcl/graph-of-thoughts.git.
Clone it, read the README to understand the usage, install it, and finally run the quick start example.
**Note the config for LLM is at `config/config_got.json`, use this path directly.** Don't write all codes in one response, each time, just write code for one step.
**Note the config for LLM is at `config/config_got.json`, it's outside the repo path, before using it, you need to copy it into graph-of-thoughts.
** Don't write all codes in one response, each time, just write code for one step.
"""

View file

@ -7,7 +7,10 @@ from metagpt.roles.di.mgx import MGX
requirement = (
# "design a game using Gym (an open source Python library), including a graphical interface and interactive gameplay"
'Create a "2048 game"'
# "帮我把pip的源设置成https://pypi.tuna.tsinghua.edu.cn/simple"
# "This is a website url does not require login: https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767 please write a similar web page,developed in vue language, The package.json dependency must be generated"
"I would like to imitate the website available at https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767. Could you please browse through it?"
# "Create a 2048 Game"
)

View file

@ -29,44 +29,38 @@ class SOPItemDef(BaseModel):
class SOPItem(Enum):
SOFTWARE_DEVELOPMENT = SOPItemDef(
name="software development",
description="Intentions related to or including software development, such as developing or building software, games, app, websites, etc. Excluding bug fixes, report any issues, environment setup, operations and pip install.",
description="Software development intention including developing or building software, games, app, websites, etc. EXCLUDING bug fixes, report any issues, environment setup, terminal operations, and pip install.",
sop=[
"Writes a PRD based on software requirements.",
"Writes a design to the project repository, based on the PRD of the project.",
"Writes a system design to the project repository, based on the PRD of the project. Write high-level system design instead of the actual code.",
"Writes a project plan to the project repository, based on the design of the project.",
"Writes code to implement designed features according to the project plan and adds them to the project repository.",
"Stage and commit changes for the project repository using Git.",
],
)
FIX_BUGS = SOPItemDef(
name="fix bugs",
description="Fix bugs in a given project.",
sop=[
"Fix bugs in the project repository.",
"Stage and commit changes for the project repository using Git.",
],
)
FORMAT_REPO = SOPItemDef(
name="format repo",
description="download repository from git and format the project to MetaGPT project",
sop=[
"Imports a project from a Git website and formats it to MetaGPT project format to enable incremental appending requirements.",
"Stage and commit changes for the project repository using Git.",
],
)
WEBPAGE_IMITATION = SOPItemDef(
name="webpage_imitation",
description="webpage browsing, imitation and other applications etc.",
sop=[
"Utilize Selenium and WebDriver for rendering.",
"Capture a screenshot of the rendered webpage.",
"Convert image to a webpage including HTML, CSS and JS in one go.",
],
# FIX_BUGS = SOPItemDef(
# name="fix bugs",
# description="Fix bugs in a given project.",
# sop=[
# "Fix bugs in the project repository.",
# "Stage and commit changes for the project repository using Git.",
# ],
# )
# FORMAT_REPO = SOPItemDef(
# name="format repo",
# description="download repository from git and format the project to MetaGPT project",
# sop=[
# "Imports a project from a Git website and formats it to MetaGPT project format to enable incremental appending requirements.",
# "Stage and commit changes for the project repository using Git.",
# ],
# )
WEB_OPERATION = SOPItemDef(
name="web operation",
description="web browsing, scraping, imitation and other interaction with the web",
)
OTHER = SOPItemDef(
name="other",
description="Other intentions that do not fall into the above categories, including data science, machine learning, deep learning and text-to-image etc.",
sop=[],
description="Other intentions that do not fall into the above categories, including data science, data analysis, machine learning, deep learning and text-to-image etc.",
)
@property
@ -128,7 +122,13 @@ class DetectIntent(Action):
async def main():
# Example usage of the DetectIntent action
user_requirements = ["Develop a 2048 game.", "Run data analysis on sklearn wine dataset"]
user_requirements = [
"Develop a 2048 game.",
"Run data analysis on sklearn wine dataset",
"帮我把pip的源设置成https://pypi.tuna.tsinghua.edu.cn/simple",
"This is a website url does not require login: https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767 please write a similar web page,developed in vue language, The package.json dependency must be generated",
"I would like to imitate the website available at https://demosc.chinaz.net/Files/DownLoad//moban/202404/moban7767. Could you please browse through it?",
]
detect_intent = DetectIntent()
for user_requirement in user_requirements:

View file

@ -27,6 +27,8 @@ from metagpt.actions import Action
from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.logs import ToolLogItem, log_tool_output, logger
INSTALL_KEEPLEN = 500
class ExecuteNbCode(Action):
"""execute notebook code block, return result to llm, and display it."""
@ -207,6 +209,7 @@ class ExecuteNbCode(Action):
if "!pip" in code:
success = False
outputs = outputs[-INSTALL_KEEPLEN:]
file_path = DEFAULT_WORKSPACE_ROOT / "code.ipynb"
nbformat.write(self.nb, file_path)

View file

@ -26,7 +26,7 @@ class ToolLogItem(BaseModel):
TOOL_LOG_END_MARKER = ToolLogItem(
type="str", name="end_marker", value="#END#"
type="str", name="end_marker", value="\x18\x19\x1B\x18"
) # A special log item to suggest the end of a stream log
@ -54,6 +54,11 @@ def log_tool_output(output: ToolLogItem | list[ToolLogItem], tool_name: str = ""
_tool_output_log(output=output, tool_name=tool_name)
async def log_tool_output_async(output: ToolLogItem | list[ToolLogItem], tool_name: str = ""):
"""async interface for logging tool output, used when output contains async object"""
await _tool_output_log_async(output=output, tool_name=tool_name)
def set_llm_stream_logfunc(func):
global _llm_stream_log
_llm_stream_log = func
@ -64,9 +69,20 @@ def set_tool_output_logfunc(func):
_tool_output_log = func
async def set_tool_output_logfunc_async(func):
# async version
global _tool_output_log_async
_tool_output_log_async = func
_llm_stream_log = partial(print, end="")
_tool_output_log = (
lambda *args, **kwargs: None
) # a dummy function to avoid errors if set_tool_output_logfunc is not called
async def _tool_output_log_async(*args, **kwargs):
# async version
pass

View file

@ -4,10 +4,11 @@
import asyncio
from typing import Dict
from metagpt.actions.di.detect_intent import DetectIntent
from metagpt.actions.di.detect_intent import DetectIntent, SOPItem
from metagpt.logs import logger
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.schema import Message
from metagpt.tools.tool_recommend import BM25ToolRecommender
class MGX(DataInterpreter):
@ -18,6 +19,10 @@ class MGX(DataInterpreter):
todo = DetectIntent(context=self.context)
request_with_sop, sop_type = await todo.run(user_msg)
logger.info(f"{sop_type} {request_with_sop}")
if sop_type == SOPItem.SOFTWARE_DEVELOPMENT.type_name:
self.tool_recommender = BM25ToolRecommender(tools=["software development"])
else:
self.tool_recommender = BM25ToolRecommender(tools=["<all>"])
return request_with_sop
async def _plan_and_act(self) -> Message:

View file

@ -13,6 +13,8 @@ from metagpt.tools.libs import (
email_login,
terminal,
file_manager,
browser,
deployer,
)
from metagpt.tools.libs.software_development import (
write_prd,
@ -40,4 +42,6 @@ _ = (
git_archive,
terminal,
file_manager,
browser,
deployer,
) # Avoid pre-commit error

View file

@ -0,0 +1,217 @@
from playwright.async_api import async_playwright
from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.logs import ToolLogItem, log_tool_output_async
from metagpt.tools.tool_registry import register_tool
@register_tool()
class Browser:
"""
A tool for browsing the web. Don't initialize a new instance of this class if one already exists.
Note: Combine searching and scrolling together to achieve most effective browsing. DON'T stick to one method.
"""
def __init__(self):
"""initiate the browser, create pages placeholder later to be managed as {page_url: page object}"""
self.browser = None
# browser status management
self.pages = {}
self.current_page_url = None
self.current_page = None
async def start(self):
"""Starts Playwright and launches a browser"""
self.playwright = await async_playwright().start()
self.browser = await self.playwright.chromium.launch()
async def _set_current_page(self, page, url):
self.current_page = page
self.current_page_url = url
print("Now on page ", url)
print(await self._view())
async def open_new_page(self, url: str):
"""open a new page in the browser and view the page"""
page = await self.browser.new_page()
await page.goto(url)
self.pages[url] = page
await self._set_current_page(page, url)
await log_tool_output_async(
ToolLogItem(type="object", name="open_new_page", value=self.current_page), tool_name="Browser"
)
async def switch_page(self, url: str):
"""switch to an opened page in the browser and view the page"""
if url in self.pages:
await self._set_current_page(self.pages[url], url)
await log_tool_output_async(
ToolLogItem(type="object", name="switch_page", value=self.current_page), tool_name="Browser"
)
else:
print(f"Page not found: {url}")
async def search_content_all(self, search_term: str) -> list[dict]:
"""search all occurences of search term in the current page and return the search results with their position.
Useful if you have a keyword or sentence in mind and want to quickly narrow down the content relevant to it.
Args:
search_term (str): the search term
Returns:
list[dict]: a list of dictionaries containing the elements and their positions, e.g.
[
{
"index": ...,
"content": {
"text_block": ...,
"links": [
{"text": ..., "href": ...},
...
]
},
"position": {from_top: ..., from_left: ...},
},
...
]
"""
locator = self.current_page.locator(f"text={search_term}")
count = await locator.count()
search_results = []
for i in range(count):
element = locator.nth(i)
if await element.is_visible():
position = await element.evaluate("e => ({ from_top: e.offsetTop, from_left: e.offsetLeft })")
# Retrieve the surrounding block of text and links with their text
content = await element.evaluate(SEARCH_CONTENT_JS)
search_results.append(
{"index": len(search_results), "content": content, "position": position, "element_obj": element}
)
print(f"Found {len(search_results)} instances of the term '{search_term}':\n\n{search_results}")
return search_results
async def scroll_to_search_result(self, search_results: list[dict], index: int = 0):
"""Scroll to the index-th search result, potentially for subsequent perception.
Useful if you have located a search result, the search result does not fulfill your requirement, and you need more information around that search result. Can only be used after search_all_content.
Args:
search_results (list[dict]): search_results from search_content_all
index (int, optional): the index of the search result to scroll to. Index starts from 0. Defaults to 0.
"""
if not search_results:
return {}
if index >= len(search_results):
print(f"Index {index} is out of range. Scrolling to the last instance.")
index = len(search_results) - 1
element = search_results[index]["element_obj"]
await element.scroll_into_view_if_needed()
await log_tool_output_async(
ToolLogItem(type="object", name="scroll_page", value=self.current_page), tool_name="Browser"
)
print(f"Successfully scrolled to the {index}-th search result")
print(await self._view())
# async def find_links(self) -> list:
# """Finds all links in the current page and returns a list of dictionaries with link text and the URL.
# Useful for navigating to more pages and exploring more resources.
# Returns:
# list: A list of dictionaries, each containing 'text' and 'href' keys.
# """
# # Use a CSS selector to find all <a> elements in the page.
# links = await self.current_page.query_selector_all("a")
# # Prepare an empty list to hold link information.
# link_info = []
# # Iterate over each link element to extract its text and href attributes.
# for link in links:
# text = await link.text_content()
# href = await link.get_attribute("href")
# link_info.append({"text": text, "href": href})
# print(f"Found {len(link_info)} links:\n\n{link_info}")
# return link_info
async def screenshot(self, path: str = DEFAULT_WORKSPACE_ROOT / "screenshot_temp.png"):
"""Take a screenshot of the current page and save it to the specified path."""
await self.current_page.screenshot(path=path)
print(f"Screenshot saved to: {path}")
async def _view(self) -> str:
"""simulate human viewing the current page, return the visible text with links"""
visible_text_with_links = await self.current_page.evaluate(VIEW_CONTENT_JS)
return visible_text_with_links
async def scroll_current_page(self, offset: int = 500):
"""scroll the current page by offset pixels, negative value means scrolling up, will print out observed content after scrolling"""
await self.current_page.evaluate(f"window.scrollBy(0, {offset})")
await log_tool_output_async(
ToolLogItem(type="object", name="scroll_page", value=self.current_page), tool_name="Browser"
)
print(f"Scrolled current page by {offset} pixels.")
print(await self._view())
def check_all_pages(self) -> dict:
"""return all pages opened in the browser, a dictionary with {page_url: page_title}, useful for understanding the current browser state"""
pages_info = {url: page.title() for url, page in self.pages.items()}
return pages_info
async def close(self):
"""close the browser and all pages"""
await self.browser.close()
await self.playwright.stop()
async def get_scroll_position(page):
return await page.evaluate("() => ({ x: window.scrollX, y: window.scrollY })")
SEARCH_CONTENT_JS = """
(element) => {
// const block = element.closest('p, div, section, article');
const block = element.parentElement;
return {
text_block: block.innerText,
// Create an array of objects, each containing the text and href of a link
links: Array.from(block.querySelectorAll('a')).map(a => ({
text: a.innerText,
href: a.href
}))
};
}
"""
VIEW_CONTENT_JS = """
() => {
return Array.from(document.querySelectorAll('body *')).filter(el => {
if (!(el.offsetWidth || el.offsetHeight || el.getClientRects().length)) return false;
const style = window.getComputedStyle(el);
if (style.display === 'none' || style.visibility !== 'visible' || style.opacity === '0') return false;
const rect = el.getBoundingClientRect();
const elemCenter = {
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2
};
if (elemCenter.x < 0 || elemCenter.y < 0 || elemCenter.x > window.innerWidth || elemCenter.y > window.innerHeight) return false;
if (document.elementFromPoint(elemCenter.x, elemCenter.y) !== el) return false;
return true;
}).map(el => {
let text = el.innerText || '';
text = text.trim();
if (!text.length) return '';
const parentAnchor = el.closest('a');
if (parentAnchor && parentAnchor.href) {
return `${text} (${parentAnchor.href})`;
}
return text;
}).filter(text => text.length > 0).join("\\n");
}
"""

View file

@ -0,0 +1,11 @@
from metagpt.logs import ToolLogItem, log_tool_output
from metagpt.tools.tool_registry import register_tool
# An un-implemented tool reserved for deploying a local service to public
@register_tool()
class Deployer:
"""Deploy a local service to public. Used only for final deployment, you should NOT use it for development and testing."""
def deploy_to_public(self, local_url: str):
log_tool_output(ToolLogItem(name="local_url", value=local_url), tool_name="Deployer")

View file

@ -75,9 +75,9 @@ async def write_prd(idea: str, project_path: Optional[str | Path] = None) -> Pat
return ctx.repo.docs.prd.workdir
@register_tool(tags=["software development", "Architect"])
@register_tool(tags=["Design", "software development", "Architect"])
async def write_design(prd_path: str | Path) -> Path:
"""Writes a design to the project repository, based on the PRD of the project.
"""Writes a system design to the project repository, based on the PRD of the project.
Args:
prd_path (str|Path): The path to the PRD files under the project directory.
@ -86,9 +86,8 @@ async def write_design(prd_path: str | Path) -> Path:
Path: The path to the system design files under the project directory.
Example:
>>> from metagpt.tools.libs.software_development import write_prd
>>> from metagpt.tools.libs.software_development import write_design
>>> prd_path = await write_prd("Create a new feature for the application")
>>> prd_path = '/path/to/project_path/docs/prd' # Returned by `write_prd`
>>> system_design_path = await write_desgin(prd_path)
>>> print(system_design_path)
'/path/to/project_path/docs/system_design/'
@ -180,6 +179,7 @@ async def write_project_plan(system_design_path: str | Path) -> Path:
@register_tool(tags=["software development", "Engineer"])
async def write_codes(task_path: str | Path, inc: bool = False) -> Path:
"""Writes code to implement designed features according to the project plan and adds them to the project repository.
In code writing tasks, prioritize calling this tool against writing code from scratch directly.
Args:
task_path (str|Path): The path to task files under the project directory.

View file

@ -1,4 +1,6 @@
import subprocess
import threading
from queue import Queue
from metagpt.logs import TOOL_LOG_END_MARKER, ToolLogItem, log_tool_output
from metagpt.tools.tool_registry import register_tool
@ -6,7 +8,12 @@ from metagpt.tools.tool_registry import register_tool
@register_tool()
class Terminal:
"""A tool for running terminal commands. Don't initialize a new instance of this class if one already exists."""
"""
A tool for running terminal commands.
Don't initialize a new instance of this class if one already exists.
For commands that need to be executed within a Conda environment, it is recommended
to use the `execute_in_conda_env` method.
"""
def __init__(self):
self.shell_command = ["bash"] # FIXME: should consider windows support later
@ -21,27 +28,70 @@ class Terminal:
text=True,
bufsize=1, # Line buffered
)
self.stdout_queue = Queue()
def run_command(self, cmd: str) -> str:
def run_command(self, cmd: str, daemon=False) -> str:
"""
Run a command in the terminal and return the output.
When the command is being executed, stream the output to the terminal.
Maintains state across commands, such as current directory.
Executes a specified command in the terminal and streams the output back in real time.
This command maintains state across executions, such as the current directory,
allowing for sequential commands to be contextually aware. The output from the
command execution is placed into `stdout_queue`, which can be consumed as needed.
Args:
cmd (str): The command to run in the terminal.
cmd (str): The command to execute in the terminal.
daemon (bool): If True, executes the command in a background thread, allowing
the main program to continue execution. The command's output is
collected asynchronously in daemon mode and placed into `stdout_queue`.
Returns:
str: The output of the terminal command.
str: The command's output or an empty string if `daemon` is True. Remember that
when `daemon` is True, the output is collected into `stdout_queue` and must
be consumed from there.
Note:
If `stdout_queue` is not periodically consumed, it could potentially grow indefinitely,
consuming memory. Ensure that there's a mechanism in place to consume this queue,
especially during long-running or output-heavy command executions.
"""
cmd_output = []
# Send the command
self.process.stdin.write(cmd + self.command_terminator)
self.process.stdin.write(
f'echo "{TOOL_LOG_END_MARKER.value}"' + self.command_terminator
f'echo "{TOOL_LOG_END_MARKER.value}"' + self.command_terminator # write EOF
) # Unique marker to signal command end
self.process.stdin.flush()
if daemon:
threading.Thread(target=self._read_and_process_output, args=(cmd,), daemon=True).start()
return ""
else:
return self._read_and_process_output(cmd)
def execute_in_conda_env(self, cmd: str, env, daemon=False) -> str:
"""
Executes a given command within a specified Conda environment automatically without
the need for manual activation. Users just need to provide the name of the Conda
environment and the command to execute.
Args:
cmd (str): The command to execute within the Conda environment.
env (str, optional): The name of the Conda environment to activate before executing the command.
If not specified, the command will run in the current active environment.
daemon (bool): If True, the command is run in a background thread, similar to `run_command`,
affecting error logging and handling in the same manner.
Returns:
str: The command's output, or an empty string if `daemon` is True, with output processed
asynchronously in that case.
Note:
This function wraps `run_command`, prepending the necessary Conda activation commands
to ensure the specified environment is active for the command's execution.
"""
cmd = f"conda run -n {env} {cmd}"
return self.run_command(cmd, daemon=daemon)
def _read_and_process_output(self, cmd):
cmd_output = []
log_tool_output(
output=ToolLogItem(name="cmd", value=cmd + self.command_terminator), tool_name="Terminal"
) # log the command
@ -49,13 +99,20 @@ class Terminal:
# Read the output until the unique marker is found
while True:
line = self.process.stdout.readline()
if line.strip() == TOOL_LOG_END_MARKER.value:
ix = line.rfind(TOOL_LOG_END_MARKER.value)
if ix >= 0:
line = line[0:ix]
if line:
log_tool_output(
output=ToolLogItem(name="output", value=line), tool_name="Terminal"
) # log stdout in real-time
cmd_output.append(line)
log_tool_output(TOOL_LOG_END_MARKER)
break
log_tool_output(
output=ToolLogItem(name="output", value=line), tool_name="Terminal"
) # log stdout in real-time
# log stdout in real-time
log_tool_output(output=ToolLogItem(name="output", value=line), tool_name="Terminal")
cmd_output.append(line)
self.stdout_queue.put(line)
return "".join(cmd_output)

View file

@ -20,8 +20,7 @@ def convert_code_to_tool_schema(obj, include: list[str] = None) -> dict:
continue
# method_doc = inspect.getdoc(method)
method_doc = get_class_method_docstring(obj, name)
if method_doc:
schema["methods"][name] = function_docstring_to_schema(method, method_doc)
schema["methods"][name] = function_docstring_to_schema(method, method_doc)
elif inspect.isfunction(obj):
schema = function_docstring_to_schema(obj, docstring)
@ -39,7 +38,7 @@ def convert_code_to_tool_schema_ast(code: str) -> list[dict]:
return visitor.get_tool_schemas()
def function_docstring_to_schema(fn_obj, docstring) -> dict:
def function_docstring_to_schema(fn_obj, docstring="") -> dict:
"""
Converts a function's docstring into a schema dictionary.

View file

@ -783,13 +783,15 @@ def load_mc_skills_code(skill_names: list[str] = None, skills_dir: Path = None)
return skills
def encode_image(image_path_or_pil: Union[Path, Image], encoding: str = "utf-8") -> str:
def encode_image(image_path_or_pil: Union[Path, Image, str], encoding: str = "utf-8") -> str:
"""encode image from file or PIL.Image into base64"""
if isinstance(image_path_or_pil, Image.Image):
buffer = BytesIO()
image_path_or_pil.save(buffer, format="JPEG")
bytes_data = buffer.getvalue()
else:
if isinstance(image_path_or_pil, str):
image_path_or_pil = Path(image_path_or_pil)
if not image_path_or_pil.exists():
raise FileNotFoundError(f"{image_path_or_pil} not exists")
with open(str(image_path_or_pil), "rb") as image_file:

View file

@ -0,0 +1,90 @@
import pytest
from metagpt.const import TEST_DATA_PATH
from metagpt.tools.libs.browser import Browser, get_scroll_position
TEST_URL = "https://docs.deepwisdom.ai/main/en/guide/get_started/quickstart.html"
TEST_SCREENSHOT_PATH = TEST_DATA_PATH / "screenshot.png"
@pytest.fixture(autouse=True)
def llm_mock(rsp_cache, mocker, request):
# An empty fixture to overwrite the global llm_mock fixture
# because in provider folder, we want to test the aask and aask functions for the specific models
pass
@pytest.fixture
def browser():
browser_instance = Browser()
yield browser_instance
@pytest.mark.asyncio
async def test_open_and_switch_page(browser):
await browser.start()
await browser.open_new_page("https://baidu.com")
await browser.open_new_page("https://tencent.com")
assert browser.current_page_url == "https://tencent.com"
await browser.switch_page("https://baidu.com")
assert browser.current_page_url == "https://baidu.com"
await browser.close()
@pytest.mark.asyncio
async def test_search(browser):
await browser.start()
# search all
await browser.open_new_page(TEST_URL)
search_term = "startup example"
search_results = await browser.search_content_all(search_term)
print(search_results)
# expected search result as of 20240410:
# [{'index': 0, 'content': {'text_block': 'Below is a breakdown of the software startup example. If you install MetaGPT with the git clone approach, simply run', 'links': [{'text': 'software startup example', 'href': 'https://github.com/geekan/MetaGPT/blob/main/metagpt/software_company.py'}]}, 'position': {'from_top': 640, 'from_left': 225}, 'element_obj': <Locator frame=<Frame name= url='https://docs.deepwisdom.ai/main/en/guide/get_started/quickstart.html'> selector='text=startup example >> nth=0'>}]
first_result = search_results[0]["content"]
assert "software startup example" in first_result["text_block"]
assert first_result["links"]
assert first_result["links"][0]["href"] == "https://github.com/geekan/MetaGPT/blob/main/metagpt/software_company.py"
assert search_results[0]["position"]
# scroll to search result
await browser.scroll_to_search_result(search_results, index=0)
await browser.close()
# @pytest.mark.asyncio
# async def test_find_links(browser):
# await browser.start()
# await browser.open_new_page(TEST_URL)
# link_info = await browser.find_links()
# assert link_info
# await browser.close()
@pytest.mark.asyncio
async def test_scroll(browser):
await browser.start()
await browser.open_new_page(TEST_URL)
await browser.scroll_current_page(offset=-500)
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 0} # no change if you scrol up from top
initial_view = await browser._view()
await browser.scroll_current_page(offset=500) # scroll down
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 500}
scrolled_view = await browser._view()
assert initial_view != scrolled_view
await browser.scroll_current_page(offset=-200) # scroll up
assert await get_scroll_position(browser.current_page) == {"x": 0, "y": 300}
await browser.close()

View file

@ -1,3 +1,5 @@
import pytest
from metagpt.const import DATA_PATH, METAGPT_ROOT
from metagpt.tools.libs.terminal import Terminal
@ -13,3 +15,7 @@ def test_terminal():
terminal.run_command("cd data")
output = terminal.run_command("pwd")
assert output.strip() == str(DATA_PATH)
if __name__ == "__main__":
pytest.main([__file__, "-s"])

View file

@ -48,6 +48,14 @@ class DummyClass:
pass
class DummySubClass(DummyClass):
"""sub class docstring"""
def sub_method(self, df: pd.DataFrame):
"""sub method"""
pass
def dummy_fn(
df: pd.DataFrame,
s: str,
@ -117,6 +125,18 @@ def test_convert_code_to_tool_schema_class():
assert schema == expected
def test_convert_code_to_tool_schema_subclass():
schema = convert_code_to_tool_schema(DummySubClass)
assert "sub_method" in schema["methods"] # sub class method should be included
assert "fit" in schema["methods"] # parent class method should be included
def test_convert_code_to_tool_schema_include():
schema = convert_code_to_tool_schema(DummyClass, include=["fit"])
assert "fit" in schema["methods"]
assert "transform" not in schema["methods"]
def test_convert_code_to_tool_schema_function():
expected = {
"type": "function",