From a680a1a82fd16501c47f89c6efda0b756705a415 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 12 Mar 2024 16:11:57 +0800
Subject: [PATCH 1/2] update examples

---
 examples/di/crawl_webpage.py                  | 26 ++++++++++++++++---
 examples/di/data_visualization.py             |  2 +-
 examples/di/email_summary.py                  |  2 +-
 examples/di/imitate_webpage.py                |  2 +-
 ...ools.py => machine_learning_with_tools.py} |  4 +--
 examples/di/rm_image_background.py            |  2 +-
 examples/di/sd_tool_usage.py                  |  2 +-
 examples/di/solve_math_problems.py            |  2 +-
 metagpt/roles/di/data_interpreter.py          |  2 +-
 metagpt/tools/libs/sd_engine.py               |  1 -
 metagpt/tools/tool_recommend.py               |  3 +--
 11 files changed, 32 insertions(+), 16 deletions(-)
 rename examples/di/{ml_engineer_with_tools.py => machine_learning_with_tools.py} (88%)

diff --git a/examples/di/crawl_webpage.py b/examples/di/crawl_webpage.py
index f06b85d9b..b8226f4f4 100644
--- a/examples/di/crawl_webpage.py
+++ b/examples/di/crawl_webpage.py
@@ -7,13 +7,31 @@
 
 from metagpt.roles.di.data_interpreter import DataInterpreter
 
+PAPER_LIST_REQ = """"
+Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
+and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
+"""
+
+ECOMMERCE_REQ = """
+Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
+**Notice: Firstly parse the web page encoding and the text HTML structure;
+The first page product name, price, product URL, and image URL must be saved in the csv;**
+"""
+
+NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
+下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整:
+1. 爬取并本地保存html结构;
+2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*;
+3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间;
+4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。
+5. 将全部结果存在本地csv中
+"""
+
 
 async def main():
-    prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
-    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter(tools=["scrape_web_playwright"])
 
-    await di.run(prompt)
+    await di.run(ECOMMERCE_REQ)
 
 
 if __name__ == "__main__":
diff --git a/examples/di/data_visualization.py b/examples/di/data_visualization.py
index 9af72dc42..1a21ab7cb 100644
--- a/examples/di/data_visualization.py
+++ b/examples/di/data_visualization.py
@@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
+    di = DataInterpreter()
     await di.run(requirement)
 
 
diff --git a/examples/di/email_summary.py b/examples/di/email_summary.py
index af081fee2..7c112767c 100644
--- a/examples/di/email_summary.py
+++ b/examples/di/email_summary.py
@@ -22,7 +22,7 @@ async def main():
             Firstly, Please help me fetch the latest 5 senders and full letter contents.
             Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""
 
-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter()
 
     await di.run(prompt)
 
diff --git a/examples/di/imitate_webpage.py b/examples/di/imitate_webpage.py
index e2c99b874..b00157d9d 100644
--- a/examples/di/imitate_webpage.py
+++ b/examples/di/imitate_webpage.py
@@ -15,7 +15,7 @@ Firstly, utilize Selenium and WebDriver for rendering.
 Secondly, convert image to a webpage including HTML, CSS and JS in one go. 
 Finally, save webpage in a text file. 
 Note: All required dependencies and environments have been fully installed and configured."""
-    di = DataInterpreter(use_tools=True)
+    di = DataInterpreter(tools=["GPTvGenerator"])
 
     await di.run(prompt)
 
diff --git a/examples/di/ml_engineer_with_tools.py b/examples/di/machine_learning_with_tools.py
similarity index 88%
rename from examples/di/ml_engineer_with_tools.py
rename to examples/di/machine_learning_with_tools.py
index 6119ad843..42c0ef55b 100644
--- a/examples/di/ml_engineer_with_tools.py
+++ b/examples/di/machine_learning_with_tools.py
@@ -1,10 +1,10 @@
 import asyncio
 
-from metagpt.roles.di.ml_engineer import MLEngineer
+from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str):
-    role = MLEngineer(auto_run=True, use_tools=True)
+    role = DataInterpreter(tools=["<all>"])
     await role.run(requirement)
 
 
diff --git a/examples/di/rm_image_background.py b/examples/di/rm_image_background.py
index b74a79eeb..cb7900a0a 100644
--- a/examples/di/rm_image_background.py
+++ b/examples/di/rm_image_background.py
@@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
+    di = DataInterpreter()
     await di.run(requirement)
 
 
diff --git a/examples/di/sd_tool_usage.py b/examples/di/sd_tool_usage.py
index 69c7df5bd..b373a6251 100644
--- a/examples/di/sd_tool_usage.py
+++ b/examples/di/sd_tool_usage.py
@@ -8,7 +8,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=True, goal=requirement)
+    di = DataInterpreter(tools=["SDEngine"])
     await di.run(requirement)
 
 
diff --git a/examples/di/solve_math_problems.py b/examples/di/solve_math_problems.py
index 35a69c953..ae2c1b942 100644
--- a/examples/di/solve_math_problems.py
+++ b/examples/di/solve_math_problems.py
@@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str = ""):
-    di = DataInterpreter(use_tools=False)
+    di = DataInterpreter()
     await di.run(requirement)
 
 
diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py
index 0a149e901..7eb5b9e8a 100644
--- a/metagpt/roles/di/data_interpreter.py
+++ b/metagpt/roles/di/data_interpreter.py
@@ -9,7 +9,7 @@ from metagpt.actions.di.ask_review import ReviewConst
 from metagpt.actions.di.execute_nb_code import ExecuteNbCode
 from metagpt.actions.di.write_analysis_code import CheckData, WriteCodeWithTools
 from metagpt.logs import logger
-from metagpt.prompts.mi.write_analysis_code import DATA_INFO
+from metagpt.prompts.di.write_analysis_code import DATA_INFO
 from metagpt.roles import Role
 from metagpt.schema import Message, Task, TaskResult
 from metagpt.strategy.task_type import TaskType
diff --git a/metagpt/tools/libs/sd_engine.py b/metagpt/tools/libs/sd_engine.py
index 0e3042f5c..b62e39db8 100644
--- a/metagpt/tools/libs/sd_engine.py
+++ b/metagpt/tools/libs/sd_engine.py
@@ -14,7 +14,6 @@ import requests
 from aiohttp import ClientSession
 from PIL import Image, PngImagePlugin
 
-#
 from metagpt.const import SD_OUTPUT_FILE_REPO, SOURCE_ROOT
 from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py
index fcdbc4254..9e9bf4a01 100644
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@@ -57,8 +57,7 @@ class RecommendTool(Action):
 class ToolRecommender(BaseModel):
     """
     The default ToolRecommender:
-    1. Recall: If plan exists, use exact match between task type and tool type to recall tools;
-               If plan doesn't exist (e.g. we use ReAct), return all user-specified tools;
+    1. Recall: To be implemented in subclasses. Recall tools based on the given context and plan.
     2. Rank: Use LLM to select final candidates from recalled set.
     """
 

From e960ac8dc8eafca456e902890d4825b7622a2677 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 12 Mar 2024 16:43:24 +0800
Subject: [PATCH 2/2] update requirement and example, recover legacy code

---
 ...h_tools.py => machine_learning_complex.py} |  2 +-
 metagpt/strategy/task_type.py                 | 10 +++++----
 metagpt/tools/tool_recommend.py               | 22 ++++++++++++++++++-
 requirements.txt                              |  2 ++
 4 files changed, 30 insertions(+), 6 deletions(-)
 rename examples/di/{machine_learning_with_tools.py => machine_learning_complex.py} (92%)

diff --git a/examples/di/machine_learning_with_tools.py b/examples/di/machine_learning_complex.py
similarity index 92%
rename from examples/di/machine_learning_with_tools.py
rename to examples/di/machine_learning_complex.py
index 42c0ef55b..42059ac4f 100644
--- a/examples/di/machine_learning_with_tools.py
+++ b/examples/di/machine_learning_complex.py
@@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
 
 
 async def main(requirement: str):
-    role = DataInterpreter(tools=["<all>"])
+    role = DataInterpreter(use_reflection=True, tools=["<all>"])
     await role.run(requirement)
 
 
diff --git a/metagpt/strategy/task_type.py b/metagpt/strategy/task_type.py
index 28a86f100..9eeeb79ce 100644
--- a/metagpt/strategy/task_type.py
+++ b/metagpt/strategy/task_type.py
@@ -19,29 +19,31 @@ class TaskTypeDef(BaseModel):
 
 
 class TaskType(Enum):
+    """By identifying specific types of tasks, we can inject human priors (guidance) to help task solving"""
+
     EDA = TaskTypeDef(
         name="eda",
         desc="For performing exploratory data analysis",
         guidance=EDA_PROMPT,
     )
     DATA_PREPROCESS = TaskTypeDef(
-        name="data_preprocess",
+        name="data preprocessing",
         desc="For preprocessing dataset in a data analysis or machine learning task ONLY,"
         "general data operation doesn't fall into this type",
         guidance=DATA_PREPROCESS_PROMPT,
     )
     FEATURE_ENGINEERING = TaskTypeDef(
-        name="feature_engineering",
+        name="feature engineering",
         desc="Only for creating new columns for input data.",
         guidance=FEATURE_ENGINEERING_PROMPT,
     )
     MODEL_TRAIN = TaskTypeDef(
-        name="model_train",
+        name="model train",
         desc="Only for training model.",
         guidance=MODEL_TRAIN_PROMPT,
     )
     MODEL_EVALUATE = TaskTypeDef(
-        name="model_evaluate",
+        name="model evaluate",
         desc="Only for evaluating model.",
         guidance=MODEL_EVALUATE_PROMPT,
     )
diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py
index 9e9bf4a01..9b00a7379 100644
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@@ -62,7 +62,7 @@ class ToolRecommender(BaseModel):
     """
 
     tools: dict[str, Tool] = {}
-    force: bool = False
+    force: bool = False  # whether to forcedly recommend the specified tools
 
     @field_validator("tools", mode="before")
     @classmethod
@@ -145,6 +145,26 @@ class ToolRecommender(BaseModel):
         return list(valid_tools.values())[:topk]
 
 
+class TypeMatchToolRecommender(ToolRecommender):
+    """
+    A legacy ToolRecommender using task type matching at the recall stage:
+    1. Recall: Find tools based on exact match between task type and tool tag;
+    2. Rank: LLM rank, the same as the default ToolRecommender.
+    """
+
+    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
+        if not plan:
+            return list(self.tools.values())[:topk]
+
+        # find tools based on exact match between task type and tool tag
+        task_type = plan.current_task.task_type
+        candidate_tools = TOOL_REGISTRY.get_tools_by_tag(task_type)
+        candidate_tool_names = set(self.tools.keys()) & candidate_tools.keys()
+        recalled_tools = [candidate_tools[tool_name] for tool_name in candidate_tool_names]
+
+        return recalled_tools[:topk]
+
+
 class BM25ToolRecommender(ToolRecommender):
     """
     A ToolRecommender using BM25 at the recall stage:
diff --git a/requirements.txt b/requirements.txt
index 64b174913..d0ee8c95c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -71,3 +71,5 @@ Pillow
 imap_tools==1.5.0  # Used by metagpt/tools/libs/email_login.py
 qianfan==0.3.2
 dashscope==1.14.1
+rank-bm25==0.2.2  # for tool recommendation
+jieba==0.42.1  # for tool recommendation
\ No newline at end of file