Merge branch 'data_analyst_ldy' into 'mgx_ops'

Data analyst ldy

See merge request pub/MetaGPT!189
This commit is contained in:
林义章 2024-07-15 06:55:52 +00:00
commit e2ce006d15
17 changed files with 376 additions and 198 deletions

View file

@ -5,11 +5,11 @@
# @File : __init__.py
# @Desc :
from metagpt.tools.libs import (
# data_preprocess,
# feature_engineering,
data_preprocess,
feature_engineering,
sd_engine,
gpt_v_generator,
# web_scraping,
web_scraping,
# email_login,
terminal,
editor,
@ -20,11 +20,11 @@ from metagpt.tools.libs import (
from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description
_ = (
# data_preprocess,
# feature_engineering,
data_preprocess,
feature_engineering,
sd_engine,
gpt_v_generator,
# web_scraping,
web_scraping,
# email_login,
terminal,
editor,

View file

@ -8,13 +8,15 @@ from metagpt.utils.parse_html import simplify_html
@register_tool(tags=["web scraping"])
async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None:
"""view the HTML content of current page to understand the structure. When executed, the content will be printed out
async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> str:
"""view the HTML content of current page to understand the structure.
Args:
url (str): The URL of the web page to scrape.
requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements.
keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required
Returns:
str: The HTML content of the page.
"""
async with Browser() as browser:
await browser.goto(url)
@ -36,7 +38,7 @@ async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bo
html = "\n".join(i.text for i in nodes)
mem_fs.rm_file(filename)
print(html)
return html
# async def get_elements_outerhtml(self, element_ids: list[int]):

View file

@ -104,11 +104,13 @@ class ToolRecommender(BaseModel):
return ranked_tools
async def get_recommended_tool_info(self, **kwargs) -> str:
async def get_recommended_tool_info(self, fixed: list[str] = None, **kwargs) -> str:
"""
Wrap recommended tools with their info in a string, which can be used directly in a prompt.
"""
recommended_tools = await self.recommend_tools(**kwargs)
if fixed:
recommended_tools.extend([self.tools[tool_name] for tool_name in fixed if tool_name in self.tools])
if not recommended_tools:
return ""
tool_schemas = {tool.name: tool.schemas for tool in recommended_tools}