Merge branch 'main' into feat_memory

2026-07-08 16:12:16 +02:00 · 2024-03-27 10:48:31 +08:00 · 2024-03-27 10:48:31 +08:00 · d7968fdc20
commit d7968fdc20
parent 1eb141a45f 643450388a
160 changed files with 2194 additions and 2096 deletions
--- a/examples/debate.py
+++ b/examples/debate.py
@ -5,6 +5,7 @@ Author: garylin2099
@Modified By: mashenquan, 2023-11-1. In accordance with Chapter 2.1.3 of RFC 116, modify the data type of the `send_to`
        value of the `Message` object; modify the argument type of `get_by_actions`.
 """
+
 import asyncio
 import platform
 from typing import Any
@ -105,4 +106,4 @@ def main(idea: str, investment: float = 3.0, n_round: int = 10):


 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(main)  # run as python debate.py --idea="TOPIC" --investment=3.0 --n_round=5
--- a/examples/di/README.md
+++ b/examples/di/README.md
@ -1,7 +1,7 @@
 # Data Interpreter (DI)

 ## What is Data Interpreter
-Data Interpreter is an agent who solves problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below.
+Data Interpreter is an agent who solves data-related problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below. For overall design and technical details, please see our [paper](https://arxiv.org/abs/2402.18679).

 ## Example List
 - Data visualization
@ -12,7 +12,9 @@ ## Example List
 - Tool usage: web page imitation
 - Tool usage: web crawling
 - Tool usage: text2image
- Tool usage: email summarization and response
+- Tool usage: email summarization and response\
 - More on the way!

-Please see [here](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for detailed explanation.
+Please see the [docs](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for more explanation.
+
+We are continuously releasing codes, stay tuned!
--- a/examples/di/arxiv_reader.py
+++ b/examples/di/arxiv_reader.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+async def main():
+    template = "https://arxiv.org/list/{tag}/pastweek?skip=0&show=300"
+    tags = ["cs.ai", "cs.cl", "cs.lg", "cs.se"]
+    urls = [template.format(tag=tag) for tag in tags]
+    prompt = f"""This is a collection of arxiv urls: '{urls}' .
+Record each article, remove duplicates by title (they may have multiple tags), filter out papers related to 
+large language model / agent / llm, print top 100 and visualize the word count of the titles"""
+    di = DataInterpreter(react_mode="react", tools=["scrape_web_playwright"])
+
+    await di.run(prompt)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/examples/di/crawl_webpage.py
+++ b/examples/di/crawl_webpage.py
@ -7,13 +7,31 @@

 from metagpt.roles.di.data_interpreter import DataInterpreter

+PAPER_LIST_REQ = """"
+Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
+and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
+"""
+
+ECOMMERCE_REQ = """
+Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
+**Notice: Firstly parse the web page encoding and the text HTML structure;
+The first page product name, price, product URL, and image URL must be saved in the csv;**
+"""
+
+NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
+下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整:
+1. 爬取并本地保存html结构;
+2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*;
+3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间;
+4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。
+5. 将全部结果存在本地csv中
+"""
+

 async def main():
-    prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
-    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter(tools=["scrape_web_playwright"])

-    await di.run(prompt)
+    await di.run(ECOMMERCE_REQ)


 if __name__ == "__main__":
--- a/examples/di/custom_tool.py
+++ b/examples/di/custom_tool.py
@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2024/3/22 10:54
+@Author  : alexanderwu
+@File    : custom_tool.py
+"""
+
+from metagpt.roles.di.data_interpreter import DataInterpreter
+from metagpt.tools.tool_registry import register_tool
+
+
+@register_tool()
+def magic_function(arg1: str, arg2: int) -> dict:
+    """
+    The magic function that does something.
+
+    Args:
+        arg1 (str): ...
+        arg2 (int): ...
+
+    Returns:
+        dict: ...
+    """
+    return {"arg1": arg1 * 3, "arg2": arg2 * 5}
+
+
+async def main():
+    di = DataInterpreter(tools=["magic_function"])
+    await di.run("Just call the magic function with arg1 'A' and arg2 2. Tell me the result.")
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/examples/di/data_visualization.py
+++ b/examples/di/data_visualization.py
@ -1,14 +1,17 @@
 import asyncio

+from metagpt.logs import logger
 from metagpt.roles.di.data_interpreter import DataInterpreter
+from metagpt.utils.recovery_util import save_history


 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
-    await di.run(requirement)
+    di = DataInterpreter()
+    rsp = await di.run(requirement)
+    logger.info(rsp)
+    save_history(role=di)


 if __name__ == "__main__":
    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
-
    asyncio.run(main(requirement))
--- a/examples/di/email_summary.py
+++ b/examples/di/email_summary.py
@ -22,7 +22,7 @@ async def main():
            Firstly, Please help me fetch the latest 5 senders and full letter contents.
            Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""

-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter()

    await di.run(prompt)

--- a/examples/di/imitate_webpage.py
+++ b/examples/di/imitate_webpage.py
@ -12,10 +12,9 @@ async def main():
    web_url = "https://pytorch.org/"
    prompt = f"""This is a URL of webpage: '{web_url}' .
 Firstly, utilize Selenium and WebDriver for rendering. 
-Secondly, convert image to a webpage including HTML, CSS and JS in one go. 
-Finally, save webpage in a text file. 
+Secondly, convert image to a webpage including HTML, CSS and JS in one go.
 Note: All required dependencies and environments have been fully installed and configured."""
-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter(tools=["GPTvGenerator"])

    await di.run(prompt)

--- a/examples/di/machine_learning.py
+++ b/examples/di/machine_learning.py
@ -2,11 +2,21 @@ import fire

 from metagpt.roles.di.data_interpreter import DataInterpreter

+WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."

-async def main(auto_run: bool = True):
-    requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
-    di = DataInterpreter(auto_run=auto_run)
-    await di.run(requirement)
+DATA_DIR = "path/to/your/data"
+# sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data
+SALES_FORECAST_REQ = f"""Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, the others is train dataset), include plot total sales trends, print metric and plot scatter plots of
+groud truth and predictions on validation data. Dataset is {DATA_DIR}/train.csv, the metric is weighted mean absolute error (WMAE) for test data. Notice: *print* key variables to get more information for next task step.
+"""
+
+REQUIREMENTS = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ}
+
+
+async def main(use_case: str = "wine"):
+    mi = DataInterpreter()
+    requirement = REQUIREMENTS[use_case]
+    await mi.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/machine_learning_with_tools.py
+++ b/examples/di/machine_learning_with_tools.py
@ -0,0 +1,16 @@
+import asyncio
+
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+async def main(requirement: str):
+    role = DataInterpreter(use_reflection=True, tools=["<all>"])
+    await role.run(requirement)
+
+
+if __name__ == "__main__":
+    data_path = "your/path/to/titanic"
+    train_path = f"{data_path}/split_train.csv"
+    eval_path = f"{data_path}/split_eval.csv"
+    requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{train_path}', eval data path: '{eval_path}'."
+    asyncio.run(main(requirement))
--- a/examples/di/ml_engineer_with_tools.py
+++ b/examples/di/ml_engineer_with_tools.py
@ -1,16 +0,0 @@
-import asyncio
-
-from metagpt.roles.di.ml_engineer import MLEngineer
-
-
-async def main(requirement: str):
-    role = MLEngineer(auto_run=True, use_tools=True)
-    await role.run(requirement)
-
-
-if __name__ == "__main__":
-    data_path = "your_path_to_icr/icr-identify-age-related-conditions"
-    train_path = f"{data_path}/your_train_data.csv"
-    eval_path = f"{data_path}/your_eval_data.csv"
-    requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {train_path}, eval data path:{eval_path}."
-    asyncio.run(main(requirement))
--- a/examples/di/rm_image_background.py
+++ b/examples/di/rm_image_background.py
@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
+    di = DataInterpreter()
    await di.run(requirement)


--- a/examples/di/sd_tool_usage.py
+++ b/examples/di/sd_tool_usage.py
@ -8,7 +8,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=True, goal=requirement)
+    di = DataInterpreter(tools=["SDEngine"])
    await di.run(requirement)


--- a/examples/di/solve_math_problems.py
+++ b/examples/di/solve_math_problems.py
@ -4,10 +4,11 @@ from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
+    di = DataInterpreter()
    await di.run(requirement)


 if __name__ == "__main__":
    requirement = "Solve this math problem: The greatest common divisor of positive integers m and n is 6. The least common multiple of m and n is 126. What is the least possible value of m + n?"
+    # answer: 60 (m = 18, n = 42)
    asyncio.run(main(requirement))
--- a/examples/reverse_engineering.py
+++ b/examples/reverse_engineering.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import asyncio
+import shutil
+from pathlib import Path
+
+import typer
+
+from metagpt.actions.rebuild_class_view import RebuildClassView
+from metagpt.actions.rebuild_sequence_view import RebuildSequenceView
+from metagpt.context import Context
+from metagpt.llm import LLM
+from metagpt.logs import logger
+from metagpt.utils.git_repository import GitRepository
+from metagpt.utils.project_repo import ProjectRepo
+
+app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
+
+
+@app.command("", help="Python project reverse engineering.")
+def startup(
+    project_root: str = typer.Argument(
+        default="",
+        help="Specify the root directory of the existing project for reverse engineering.",
+    ),
+    output_dir: str = typer.Option(default="", help="Specify the output directory path for reverse engineering."),
+):
+    package_root = Path(project_root)
+    if not package_root.exists():
+        raise FileNotFoundError(f"{project_root} not exists")
+    if not _is_python_package_root(package_root):
+        raise FileNotFoundError(f'There are no "*.py" files under "{project_root}".')
+    init_file = package_root / "__init__.py"  # used by pyreverse
+    init_file_exists = init_file.exists()
+    if not init_file_exists:
+        init_file.touch()
+
+    if not output_dir:
+        output_dir = package_root / "../reverse_engineering_output"
+    logger.info(f"output dir:{output_dir}")
+    try:
+        asyncio.run(reverse_engineering(package_root, Path(output_dir)))
+    finally:
+        if not init_file_exists:
+            init_file.unlink(missing_ok=True)
+        tmp_dir = package_root / "__dot__"
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _is_python_package_root(package_root: Path) -> bool:
+    for file_path in package_root.iterdir():
+        if file_path.is_file():
+            if file_path.suffix == ".py":
+                return True
+    return False
+
+
+async def reverse_engineering(package_root: Path, output_dir: Path):
+    ctx = Context()
+    ctx.git_repo = GitRepository(output_dir)
+    ctx.repo = ProjectRepo(ctx.git_repo)
+    action = RebuildClassView(name="ReverseEngineering", i_context=str(package_root), llm=LLM(), context=ctx)
+    await action.run()
+
+    action = RebuildSequenceView(name="ReverseEngineering", llm=LLM(), context=ctx)
+    await action.run()
+
+
+if __name__ == "__main__":
+    app()
--- a/examples/search_with_specific_engine.py
+++ b/examples/search_with_specific_engine.py
@ -4,21 +4,17 @@
 """
 import asyncio

+from metagpt.config2 import Config
 from metagpt.roles import Searcher
-from metagpt.tools.search_engine import SearchEngine, SearchEngineType
+from metagpt.tools.search_engine import SearchEngine


 async def main():
    question = "What are the most interesting human facts?"
-    kwargs = {"api_key": "", "cse_id": "", "proxy": None}
-    # Serper API
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPER_GOOGLE, **kwargs)).run(question)
-    # SerpAPI
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPAPI_GOOGLE, **kwargs)).run(question)
-    # Google API
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DIRECT_GOOGLE, **kwargs)).run(question)
-    # DDG API
-    await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DUCK_DUCK_GO, **kwargs)).run(question)
+
+    search = Config.default().search
+    kwargs = search.model_dump()
+    await Searcher(search_engine=SearchEngine(engine=search.api_type, **kwargs)).run(question)


 if __name__ == "__main__":