From a680a1a82fd16501c47f89c6efda0b756705a415 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 12 Mar 2024 16:11:57 +0800 Subject: [PATCH 1/2] update examples --- examples/di/crawl_webpage.py | 26 ++++++++++++++++--- examples/di/data_visualization.py | 2 +- examples/di/email_summary.py | 2 +- examples/di/imitate_webpage.py | 2 +- ...ools.py => machine_learning_with_tools.py} | 4 +-- examples/di/rm_image_background.py | 2 +- examples/di/sd_tool_usage.py | 2 +- examples/di/solve_math_problems.py | 2 +- metagpt/roles/di/data_interpreter.py | 2 +- metagpt/tools/libs/sd_engine.py | 1 - metagpt/tools/tool_recommend.py | 3 +-- 11 files changed, 32 insertions(+), 16 deletions(-) rename examples/di/{ml_engineer_with_tools.py => machine_learning_with_tools.py} (88%) diff --git a/examples/di/crawl_webpage.py b/examples/di/crawl_webpage.py index f06b85d9b..b8226f4f4 100644 --- a/examples/di/crawl_webpage.py +++ b/examples/di/crawl_webpage.py @@ -7,13 +7,31 @@ from metagpt.roles.di.data_interpreter import DataInterpreter +PAPER_LIST_REQ = """" +Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, +and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables* +""" + +ECOMMERCE_REQ = """ +Get products data from website https://scrapeme.live/shop/ and save it as a csv file. +**Notice: Firstly parse the web page encoding and the text HTML structure; +The first page product name, price, product URL, and image URL must be saved in the csv;** +""" + +NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**; +下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整: +1. 爬取并本地保存html结构; +2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; +3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间; +4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。 +5. 将全部结果存在本地csv中 +""" + async def main(): - prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, - and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*""" - di = DataInterpreter(use_tools=True) + di = DataInterpreter(tools=["scrape_web_playwright"]) - await di.run(prompt) + await di.run(ECOMMERCE_REQ) if __name__ == "__main__": diff --git a/examples/di/data_visualization.py b/examples/di/data_visualization.py index 9af72dc42..1a21ab7cb 100644 --- a/examples/di/data_visualization.py +++ b/examples/di/data_visualization.py @@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str = ""): - di = DataInterpreter(use_tools=False) + di = DataInterpreter() await di.run(requirement) diff --git a/examples/di/email_summary.py b/examples/di/email_summary.py index af081fee2..7c112767c 100644 --- a/examples/di/email_summary.py +++ b/examples/di/email_summary.py @@ -22,7 +22,7 @@ async def main(): Firstly, Please help me fetch the latest 5 senders and full letter contents. Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format.""" - di = DataInterpreter(use_tools=True) + di = DataInterpreter() await di.run(prompt) diff --git a/examples/di/imitate_webpage.py b/examples/di/imitate_webpage.py index e2c99b874..b00157d9d 100644 --- a/examples/di/imitate_webpage.py +++ b/examples/di/imitate_webpage.py @@ -15,7 +15,7 @@ Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. Note: All required dependencies and environments have been fully installed and configured.""" - di = DataInterpreter(use_tools=True) + di = DataInterpreter(tools=["GPTvGenerator"]) await di.run(prompt) diff --git a/examples/di/ml_engineer_with_tools.py b/examples/di/machine_learning_with_tools.py similarity index 88% rename from examples/di/ml_engineer_with_tools.py rename to examples/di/machine_learning_with_tools.py index 6119ad843..42c0ef55b 100644 --- a/examples/di/ml_engineer_with_tools.py +++ b/examples/di/machine_learning_with_tools.py @@ -1,10 +1,10 @@ import asyncio -from metagpt.roles.di.ml_engineer import MLEngineer +from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str): - role = MLEngineer(auto_run=True, use_tools=True) + role = DataInterpreter(tools=[""]) await role.run(requirement) diff --git a/examples/di/rm_image_background.py b/examples/di/rm_image_background.py index b74a79eeb..cb7900a0a 100644 --- a/examples/di/rm_image_background.py +++ b/examples/di/rm_image_background.py @@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str = ""): - di = DataInterpreter(use_tools=False) + di = DataInterpreter() await di.run(requirement) diff --git a/examples/di/sd_tool_usage.py b/examples/di/sd_tool_usage.py index 69c7df5bd..b373a6251 100644 --- a/examples/di/sd_tool_usage.py +++ b/examples/di/sd_tool_usage.py @@ -8,7 +8,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str = ""): - di = DataInterpreter(use_tools=True, goal=requirement) + di = DataInterpreter(tools=["SDEngine"]) await di.run(requirement) diff --git a/examples/di/solve_math_problems.py b/examples/di/solve_math_problems.py index 35a69c953..ae2c1b942 100644 --- a/examples/di/solve_math_problems.py +++ b/examples/di/solve_math_problems.py @@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str = ""): - di = DataInterpreter(use_tools=False) + di = DataInterpreter() await di.run(requirement) diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py index 0a149e901..7eb5b9e8a 100644 --- a/metagpt/roles/di/data_interpreter.py +++ b/metagpt/roles/di/data_interpreter.py @@ -9,7 +9,7 @@ from metagpt.actions.di.ask_review import ReviewConst from metagpt.actions.di.execute_nb_code import ExecuteNbCode from metagpt.actions.di.write_analysis_code import CheckData, WriteCodeWithTools from metagpt.logs import logger -from metagpt.prompts.mi.write_analysis_code import DATA_INFO +from metagpt.prompts.di.write_analysis_code import DATA_INFO from metagpt.roles import Role from metagpt.schema import Message, Task, TaskResult from metagpt.strategy.task_type import TaskType diff --git a/metagpt/tools/libs/sd_engine.py b/metagpt/tools/libs/sd_engine.py index 0e3042f5c..b62e39db8 100644 --- a/metagpt/tools/libs/sd_engine.py +++ b/metagpt/tools/libs/sd_engine.py @@ -14,7 +14,6 @@ import requests from aiohttp import ClientSession from PIL import Image, PngImagePlugin -# from metagpt.const import SD_OUTPUT_FILE_REPO, SOURCE_ROOT from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py index fcdbc4254..9e9bf4a01 100644 --- a/metagpt/tools/tool_recommend.py +++ b/metagpt/tools/tool_recommend.py @@ -57,8 +57,7 @@ class RecommendTool(Action): class ToolRecommender(BaseModel): """ The default ToolRecommender: - 1. Recall: If plan exists, use exact match between task type and tool type to recall tools; - If plan doesn't exist (e.g. we use ReAct), return all user-specified tools; + 1. Recall: To be implemented in subclasses. Recall tools based on the given context and plan. 2. Rank: Use LLM to select final candidates from recalled set. """ From e960ac8dc8eafca456e902890d4825b7622a2677 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 12 Mar 2024 16:43:24 +0800 Subject: [PATCH 2/2] update requirement and example, recover legacy code --- ...h_tools.py => machine_learning_complex.py} | 2 +- metagpt/strategy/task_type.py | 10 +++++---- metagpt/tools/tool_recommend.py | 22 ++++++++++++++++++- requirements.txt | 2 ++ 4 files changed, 30 insertions(+), 6 deletions(-) rename examples/di/{machine_learning_with_tools.py => machine_learning_complex.py} (92%) diff --git a/examples/di/machine_learning_with_tools.py b/examples/di/machine_learning_complex.py similarity index 92% rename from examples/di/machine_learning_with_tools.py rename to examples/di/machine_learning_complex.py index 42c0ef55b..42059ac4f 100644 --- a/examples/di/machine_learning_with_tools.py +++ b/examples/di/machine_learning_complex.py @@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter async def main(requirement: str): - role = DataInterpreter(tools=[""]) + role = DataInterpreter(use_reflection=True, tools=[""]) await role.run(requirement) diff --git a/metagpt/strategy/task_type.py b/metagpt/strategy/task_type.py index 28a86f100..9eeeb79ce 100644 --- a/metagpt/strategy/task_type.py +++ b/metagpt/strategy/task_type.py @@ -19,29 +19,31 @@ class TaskTypeDef(BaseModel): class TaskType(Enum): + """By identifying specific types of tasks, we can inject human priors (guidance) to help task solving""" + EDA = TaskTypeDef( name="eda", desc="For performing exploratory data analysis", guidance=EDA_PROMPT, ) DATA_PREPROCESS = TaskTypeDef( - name="data_preprocess", + name="data preprocessing", desc="For preprocessing dataset in a data analysis or machine learning task ONLY," "general data operation doesn't fall into this type", guidance=DATA_PREPROCESS_PROMPT, ) FEATURE_ENGINEERING = TaskTypeDef( - name="feature_engineering", + name="feature engineering", desc="Only for creating new columns for input data.", guidance=FEATURE_ENGINEERING_PROMPT, ) MODEL_TRAIN = TaskTypeDef( - name="model_train", + name="model train", desc="Only for training model.", guidance=MODEL_TRAIN_PROMPT, ) MODEL_EVALUATE = TaskTypeDef( - name="model_evaluate", + name="model evaluate", desc="Only for evaluating model.", guidance=MODEL_EVALUATE_PROMPT, ) diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py index 9e9bf4a01..9b00a7379 100644 --- a/metagpt/tools/tool_recommend.py +++ b/metagpt/tools/tool_recommend.py @@ -62,7 +62,7 @@ class ToolRecommender(BaseModel): """ tools: dict[str, Tool] = {} - force: bool = False + force: bool = False # whether to forcedly recommend the specified tools @field_validator("tools", mode="before") @classmethod @@ -145,6 +145,26 @@ class ToolRecommender(BaseModel): return list(valid_tools.values())[:topk] +class TypeMatchToolRecommender(ToolRecommender): + """ + A legacy ToolRecommender using task type matching at the recall stage: + 1. Recall: Find tools based on exact match between task type and tool tag; + 2. Rank: LLM rank, the same as the default ToolRecommender. + """ + + async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]: + if not plan: + return list(self.tools.values())[:topk] + + # find tools based on exact match between task type and tool tag + task_type = plan.current_task.task_type + candidate_tools = TOOL_REGISTRY.get_tools_by_tag(task_type) + candidate_tool_names = set(self.tools.keys()) & candidate_tools.keys() + recalled_tools = [candidate_tools[tool_name] for tool_name in candidate_tool_names] + + return recalled_tools[:topk] + + class BM25ToolRecommender(ToolRecommender): """ A ToolRecommender using BM25 at the recall stage: diff --git a/requirements.txt b/requirements.txt index 64b174913..d0ee8c95c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,3 +71,5 @@ Pillow imap_tools==1.5.0 # Used by metagpt/tools/libs/email_login.py qianfan==0.3.2 dashscope==1.14.1 +rank-bm25==0.2.2 # for tool recommendation +jieba==0.42.1 # for tool recommendation \ No newline at end of file