Merge branch 'data_analyst_ldy' into 'mgx_ops'

Data analyst ldy See merge request pub/MetaGPT!189
2026-04-29 10:56:22 +02:00 · 2024-07-15 06:55:52 +00:00 · 2024-07-15 06:55:52 +00:00 · e2ce006d15
commit e2ce006d15
parent 123850777f b214e49733
17 changed files with 376 additions and 198 deletions
--- a/metagpt/tools/libs/init.py
+++ b/metagpt/tools/libs/init.py
@ -5,11 +5,11 @@
 # @File    : __init__.py
 # @Desc    :
 from metagpt.tools.libs import (
-    # data_preprocess,
-    # feature_engineering,
+    data_preprocess,
+    feature_engineering,
    sd_engine,
    gpt_v_generator,
-    # web_scraping,
+    web_scraping,
    # email_login,
    terminal,
    editor,
@ -20,11 +20,11 @@ from metagpt.tools.libs import (
 from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description

 _ = (
-    # data_preprocess,
-    # feature_engineering,
+    data_preprocess,
+    feature_engineering,
    sd_engine,
    gpt_v_generator,
-    # web_scraping,
+    web_scraping,
    # email_login,
    terminal,
    editor,
--- a/metagpt/tools/libs/web_scraping.py
+++ b/metagpt/tools/libs/web_scraping.py
@ -8,13 +8,15 @@ from metagpt.utils.parse_html import simplify_html


@register_tool(tags=["web scraping"])
-async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None:
-    """view the HTML content of current page to understand the structure. When executed, the content will be printed out
+async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> str:
+    """view the HTML content of current page to understand the structure.

    Args:
        url (str): The URL of the web page to scrape.
        requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements.
        keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required
+    Returns:
+        str: The HTML content of the page.
    """
    async with Browser() as browser:
        await browser.goto(url)
@ -36,7 +38,7 @@ async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bo
        html = "\n".join(i.text for i in nodes)

    mem_fs.rm_file(filename)
-    print(html)
+    return html


 # async def get_elements_outerhtml(self, element_ids: list[int]):
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@ -104,11 +104,13 @@ class ToolRecommender(BaseModel):

        return ranked_tools

-    async def get_recommended_tool_info(self, **kwargs) -> str:
+    async def get_recommended_tool_info(self, fixed: list[str] = None, **kwargs) -> str:
        """
        Wrap recommended tools with their info in a string, which can be used directly in a prompt.
        """
        recommended_tools = await self.recommend_tools(**kwargs)
+        if fixed:
+            recommended_tools.extend([self.tools[tool_name] for tool_name in fixed if tool_name in self.tools])
        if not recommended_tools:
            return ""
        tool_schemas = {tool.name: tool.schemas for tool in recommended_tools}