From 0271cd7f7715351ce4c9dd4fd38ad37a6064934f Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Fri, 15 Mar 2024 15:30:42 +0800
Subject: [PATCH 01/12] fix repeated api call bug

---
 metagpt/provider/openai_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py
index 8f3b71c42..e99707ab9 100644
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@@ -142,7 +142,7 @@ class OpenAILLM(BaseLLM):
     async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
         """when streaming, print each token in place."""
         if stream:
-            await self._achat_completion_stream(messages, timeout=timeout)
+            return await self._achat_completion_stream(messages, timeout=timeout)
 
         rsp = await self._achat_completion(messages, timeout=timeout)
         return self.get_choice_text(rsp)

From 804dc169f461ad20400bce148193d483f93d6ffc Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Fri, 15 Mar 2024 21:25:30 +0800
Subject: [PATCH 02/12] Add examples of paper reproduction

---
 examples/di/README.md               | 84 ++++++++++++++++++++++++++++-
 examples/di/requirements_prompt.py  | 69 ++++++++++++++++++++++++
 examples/di/run_ml_benchmark.py     | 21 ++++++++
 examples/di/run_open_ended_tasks.py | 21 ++++++++
 4 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 examples/di/requirements_prompt.py
 create mode 100644 examples/di/run_ml_benchmark.py
 create mode 100644 examples/di/run_open_ended_tasks.py

diff --git a/examples/di/README.md b/examples/di/README.md
index f63795f13..3dbdc328b 100644
--- a/examples/di/README.md
+++ b/examples/di/README.md
@@ -12,9 +12,89 @@ ## Example List
 - Tool usage: web page imitation
 - Tool usage: web crawling
 - Tool usage: text2image
-- Tool usage: email summarization and response\
+- Tool usage: email summarization and response
 - More on the way!
 
 Please see the [docs](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for more explanation.
 
-We are continuously releasing codes, stay tuned!
+## Paper Experiments
+
+Before running the experiments, download the [di_dataset](https://drive.google.com/drive/folders/17SpI9WL9kzd260q2DArbXKNcqhidjA7s?usp=sharing) and place it in the specified path (default `DATA_PATH` or any other path you prefer). Note that the `DATA_PATH = METAGPT_ROOT / "data"`.
+
+To reproduce the results in the paper, run the following commands:
+
+```
+python run_ml_benchmark.py --task_name 05_titanic
+python run_open_ended_tasks.py --task_name 14_image_background_removal --data_dir directory_to_di_dataset --use_reflection True
+```
+
+The very simple `run_ml_benchmark.py` and `run_open_ended_tasks.py` scripts implement the pipeline of the Data Interpreter.
+
+Some key arguments:
+
+- `--task_name`: required, the name of the task that combines the ID and the dataset name or the scenario name, e.g., `05_titanic` or `14_image_background_removal`.
+- `--data_dir`: optional, the directory that stores the `di_dataset` (default is `DATA_PATH`).
+- `--use_reflection`: optional, the flag to use reflection or not (default is True).
+
+## Data Interpreter Benchmark
+
+### Data Interpreter Dataset Structure
+
+di_dataset
+
+- ml_benchmark
+    - 05_titanic
+    - 06_house-prices-advanced-regression-techniques
+    - 07_santander-customer-transaction-prediction
+    - 08_icr-identify-age-related-conditions
+    - 09_santander-value-prediction-challenge
+- open_ended_tasks
+    - 01_invoice_ocr
+    - 02_invoice_ocr
+    - 03_invoice_ocr
+    - 14_image_background_removal
+    - 16_image_2_code_generation
+    - 17_image_2_code_generation
+
+### ML-Benchmark Dataset and Requirements
+
+ML-Benchmark contains 9 typical machine learning datasets.
+
+| ID | Dataset Name                                  | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|----|-----------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 01 | Iris                                          | Run data analysis on sklearn Iris dataset, include a plot                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| 02 | Diabetes                                      | Run data analysis on sklearn Diabetes dataset, include a plot                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| 03 | Wine recognition                              | Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class with 20% as test set, and show prediction accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| 04 | Breast Cancer Wisconsin (Diagnostic)          | Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| 05 | Titanic                                       | This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/05_titanic/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/05_titanic/split_eval.csv'.                                                                                                                                                                               |
+| 06 | House Prices - Advanced Regression Techniques | This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/06_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/06_house-prices-advanced-regression-techniques/split_eval.csv'.      |
+| 07 | Santander Customer Transaction Prediction     | This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/07_santander-customer-transaction-prediction/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/07_santander-customer-transaction-prediction/split_eval.csv' .                                                                                    |
+| 08 | ICR - Identifying Age-Related Conditions      | This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/08_icr-identify-age-related-conditions/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/08_icr-identify-age-related-conditions/split_eval.csv' . |
+| 09 | Santander Value Prediction Challenge          | This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/09_santander-value-prediction-challenge/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/09_santander-value-prediction-challenge/split_eval.csv' .                                                                                                     |
+
+### Open-Ended Tasks Dataset and Requirements
+
+Open-Ended Tasks have collected and designed 20 moderately challenging open-ended tasks, requiring Data Interpreters to understand user requirements, plan and decompose tasks, and generate and execute code.
+
+| ID | Scenario                           | Scenario Description                                                                                                                                    | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|----|------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 1  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/01_invoice_ocr.png                                                                                                                                                                       |
+| 2  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/02_invoice_ocr.jpg'                                           |
+| 3  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/03_invoice_ocr.jpg'                                                                                                                                                   |
+| 4  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | Get data from `paperlist` table in <https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/,> and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*                                                                                                                                                                                                                                                   |
+| 5  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 获取https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html的cpi数据, 请按照这个计划一步一步执行: 1. 检测目标网页的编码类型和html结构. 2.爬取网页, 将网页正文内容去重，并转换为段落清晰适合阅读的纯文本, 并保存到target.txt. 3.设计多个正则匹配表达式来匹配target.txt中关键语句, 使用try-except语句组合各个正则匹配, 注意网页文本是中文. 4.最后使用中文总结概括关键语句回答用户的请求. **注意: 如果是代码块, 请将代码块的关键变量结果打印出来; 如果是网页文本就打印前200个字符.**                                                                                                                                                                       |
+| 6  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 爬取电子商务网站https://scrapeme.live/shop/中的商品数据并保存为csv文件。**注意: 第一步要先解析网页编码和html结构; csv中保存商品名称、价格、url、图片网址;**                                                                                                                                                                                                                                                                                                                                                                                    |
+| 7  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个*快讯*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; 3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*快讯*的标题、链接、时间; 4. 筛选最近3天的初创企业融资*快讯*, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中                                                                                                                                                                                |
+| 8  | Email reply                        | Filter through my emails and respond to them as necessary                                                                                               | You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is [@communication.microsoft.com](http://@communication.microsoft.com), please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: <englishgpt@outlook.com> Email Password: xxxx |
+| 9  | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://medium.com/.> Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. All required dependencies and environments have been fully installed and configured.                                                                                                                                                                             |
+| 10 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://pytorch.org/.> Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                           |
+| 11 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://www.kaggle.com/.> Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                    |
+| 12 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: [https://chat.openai.com/auth/login](https://chat.openai.com/auth/login.).[.](https://www.kaggle.com/.) Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.       |
+| 13 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://deepmind.google/technologies/gemini/#introduction>. Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                  |
+| 14 | Image Background Removal           | Remove the background of a given image                                                                                                                  | This is an image, you need to use python toolkit rembg remove the background of the image. image path:'{DATA_PATH}/open_ended_tasks/14_image_background_removal.jpg'; save path:'{DATA_PATH}/open_ended_tasks/14_image_background_removal.jpg'                                                                                                                                                                                                                                            |
+| 15 | Text2Img                           | Use SD tools to generate images                                                                                                                         | I want to generate an image of a beautiful girl using the stable diffusion text2image tool, sd_url = "http://your.sd.service.ip:port"                                                                                                                                                                                                                                                                                                                                                     |
+| 16 | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{DATA_PATH}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                          |
+| 17 | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{DATA_PATH}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                          |
+| 18 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied                                                                   |
+| 19 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied                                                       |
+| 20 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Make a mouse click game that click button as many times as possible in 30 seconds using pyxel. Note: pyxel environment already satisfied                                                                                                                                                                                                                                                                                                                                                  |
diff --git a/examples/di/requirements_prompt.py b/examples/di/requirements_prompt.py
new file mode 100644
index 000000000..d833ff45b
--- /dev/null
+++ b/examples/di/requirements_prompt.py
@@ -0,0 +1,69 @@
+# ML-Benchmark requirements
+IRIS_REQ = "Run data analysis on sklearn Iris dataset, include a plot"
+DIABETES_REQ = "Run data analysis on sklearn diabetes dataset, include a plot"
+WINES_RECOGNITION_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class with 20% as test set, and show prediction accuracy"
+BREAST_CANCER_WISCONSIN_REQ = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
+TITANIC_REQ = "This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/05_titanic/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/05_titanic/split_eval.csv'."
+HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ = "This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/06_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/06_house-prices-advanced-regression-techniques/split_eval.csv'."
+SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ = "This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/07_santander-customer-transaction-prediction/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/07_santander-customer-transaction-prediction/split_eval.csv' ."
+ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ = "This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/08_icr-identify-age-related-conditions/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/08_icr-identify-age-related-conditions/split_eval.csv' ."
+SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ = "This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/09_santander-value-prediction-challenge/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/09_santander-value-prediction-challenge/split_eval.csv' ."
+
+# Open-Ended Tasks requirements
+INVOICE_OCR_REQ_01 = "This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/01_invoice_ocr.png"
+INVOICE_OCR_REQ_02 = "This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/02_invoice_ocr.jpg"
+INVOICE_OCR_REQ_03 = "This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/03_invoice_ocr.jpg"
+WEB_SEARCH_AND_CRAWLING_REQ_04 = "Get data from `paperlist` table in <https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/,> and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables"
+WEB_SEARCH_AND_CRAWLING_REQ_05 = "获取https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html的cpi数据, 请按照这个计划一步一步执行: 1. 检测目标网页的编码类型和html结构. 2.爬取网页, 将网页正文内容去重，并转换为段落清晰适合阅读的纯文本, 并保存到target.txt. 3.设计多个正则匹配表达式来匹配target.txt中关键语句, 使用try-except语句组合各个正则匹配, 注意网页文本是中文. 4.最后使用中文总结概括关键语句回答用户的请求. **注意: 如果是代码块, 请将代码块的关键变量结果打印出来; 如果是网页文本就打印前200个字符."
+WEB_SEARCH_AND_CRAWLING_REQ_06 = (
+    """爬取电子商务网站https://scrapeme.live/shop/ 中的商品数据并保存为csv文件。**注意: 第一步要先解析网页编码和html结构; csv中保存商品名称、价格、url、图片网址;** """
+)
+WEB_SEARCH_AND_CRAWLING_REQ_07 = "从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个*快讯*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; 3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*快讯*的标题、链接、时间; 4. 筛选最近3天的初创企业融资*快讯*, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中"
+EMAIL_REPLY_REQ_08 = """You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is [@communication.microsoft.com](http://@communication.microsoft.com), please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: <englishgpt@outlook.com> Email Password: xxxx"""
+WEB_PAGE_IMITATION_REQ_09 = "This is a URL of webpage: https://medium.com/ . Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. All required dependencies and environments have been fully installed and configured."
+WEB_PAGE_IMITATION_REQ_10 = "This is a URL of webpage: https://pytorch.org/ . Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
+WEB_PAGE_IMITATION_REQ_11 = "This is a URL of webpage: https://www.kaggle.com/ . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
+WEB_PAGE_IMITATION_REQ_12 = "This is a URL of webpage: https://chat.openai.com/auth/login . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
+WEB_PAGE_IMITATION_REQ_13 = "This is a URL of webpage: https://deepmind.google/technologies/gemini/#introduction . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
+IMAGE_BACKGROUND_REMOVAL_REQ_14 = "This is an image, you need to use python toolkit rembg remove the background of the image. image path:'{data_dir}/di_dataset/open_ended_tasks/14_image_background_removal.jpg'; save path:'{data_dir}/di_dataset/open_ended_tasks/14_image_background_removal_result.jpg'"
+TEXT2IMG_REQ_15 = """I want to generate an image of a beautiful girl using the stable diffusion text2image tool, sd_url = 'http://your.sd.service.ip:port'"""
+IMAGE2CODE_GENERATION_REQ_16 = "This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
+IMAGE2CODE_GENERATION_REQ_17 = "This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/17_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
+GENERATE_GAMES_USING_EXISTING_REPO_REQ_18 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
+GENERATE_GAMES_USING_EXISTING_REPO_REQ_19 = "You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied"
+GENERATE_GAMES_USING_EXISTING_REPO_REQ_20 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
+
+ML_BENCHMARK_REQUIREMENTS = {
+    "01_iris": IRIS_REQ,
+    "02_diabetes": DIABETES_REQ,
+    "03_wines_recognition": WINES_RECOGNITION_REQ,
+    "04_breast_cancer_wisconsin": BREAST_CANCER_WISCONSIN_REQ,
+    "05_titanic": TITANIC_REQ,
+    "06_house-prices-advanced-regression-techniques": HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ,
+    "07_santander-customer-transaction-prediction": SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ,
+    "08_icr-identify-age-related-conditions": ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ,
+    "09_santander-value-prediction-challenge": SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ,
+}
+
+OPEN_ENDED_TASKS_REQUIREMENTS = {
+    "01_invoice_ocr": INVOICE_OCR_REQ_01,
+    "02_invoice_ocr": INVOICE_OCR_REQ_02,
+    "03_invoice_ocr": INVOICE_OCR_REQ_03,
+    "04_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_04,
+    "05_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_05,
+    "06_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_06,
+    "07_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_07,
+    "08_email_reply": EMAIL_REPLY_REQ_08,
+    "09_web_page_imitation": WEB_PAGE_IMITATION_REQ_09,
+    "10_web_page_imitation": WEB_PAGE_IMITATION_REQ_10,
+    "11_web_page_imitation": WEB_PAGE_IMITATION_REQ_11,
+    "12_web_page_imitation": WEB_PAGE_IMITATION_REQ_12,
+    "13_web_page_imitation": WEB_PAGE_IMITATION_REQ_13,
+    "14_image_background_removal": IMAGE_BACKGROUND_REMOVAL_REQ_14,
+    "15_text2img": TEXT2IMG_REQ_15,
+    "16_image_2_code_generation": IMAGE2CODE_GENERATION_REQ_16,
+    "17_image_2_code_generation": IMAGE2CODE_GENERATION_REQ_17,
+    "18_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_18,
+    "19_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_19,
+    "20_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_20,
+}
diff --git a/examples/di/run_ml_benchmark.py b/examples/di/run_ml_benchmark.py
new file mode 100644
index 000000000..ead2638f9
--- /dev/null
+++ b/examples/di/run_ml_benchmark.py
@@ -0,0 +1,21 @@
+import os
+
+import fire
+
+from examples.di.requirements_prompt import ML_BENCHMARK_REQUIREMENTS
+from metagpt.const import DATA_PATH
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+# Ensure ML-Benchmark dataset has been downloaded before using these example.
+async def main(task_name, data_dir=DATA_PATH, use_reflection=True):
+    if data_dir != DATA_PATH and not os.path.exists(os.path.join(data_dir, "di_dataset/ml_benchmark")):
+        raise FileNotFoundError(f"ML-Benchmark dataset not found in {data_dir}.")
+
+    requirement = ML_BENCHMARK_REQUIREMENTS[task_name].format(data_dir=data_dir)
+    di = DataInterpreter(use_reflection=use_reflection)
+    await di.run(requirement)
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/examples/di/run_open_ended_tasks.py b/examples/di/run_open_ended_tasks.py
new file mode 100644
index 000000000..19f0703fc
--- /dev/null
+++ b/examples/di/run_open_ended_tasks.py
@@ -0,0 +1,21 @@
+import os
+
+import fire
+
+from examples.di.requirements_prompt import OPEN_ENDED_TASKS_REQUIREMENTS
+from metagpt.const import DATA_PATH
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+# Ensure Open-Ended Tasks dataset has been downloaded before using this example.
+async def main(task_name, data_dir=DATA_PATH, use_reflection=True):
+    if data_dir != DATA_PATH and not os.path.exists(os.path.join(data_dir, "di_dataset/open_ended_tasks")):
+        raise FileNotFoundError(f"Open-ended task dataset not found in {data_dir}.")
+
+    requirement = OPEN_ENDED_TASKS_REQUIREMENTS[task_name].format(data_dir=data_dir)
+    di = DataInterpreter(use_reflection=use_reflection)
+    await di.run(requirement)
+
+
+if __name__ == "__main__":
+    fire.Fire(main)

From e53a0acc8e40c29410e3114c14ba5279e89141dd Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 18 Mar 2024 22:00:51 +0800
Subject: [PATCH 03/12] register tools from path

---
 metagpt/tools/tool_convert.py    |  2 +-
 metagpt/tools/tool_registry.py   | 56 +++++++++++++++++++++++++++++---
 metagpt/utils/parse_docstring.py |  2 +-
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index 42c65b9e7..d366bef41 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -7,7 +7,7 @@ PARSER = GoogleDocstringParser
 
 def convert_code_to_tool_schema(obj, include: list[str] = None):
     docstring = inspect.getdoc(obj)
-    assert docstring, "no docstring found for the objects, skip registering"
+    # assert docstring, "no docstring found for the objects, skip registering"
 
     if inspect.isclass(obj):
         schema = {"type": "class", "description": remove_spaces(docstring), "methods": {}}
diff --git a/metagpt/tools/tool_registry.py b/metagpt/tools/tool_registry.py
index 11269cb0f..e3d270b79 100644
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@@ -7,10 +7,10 @@
 """
 from __future__ import annotations
 
+import importlib.util
 import inspect
 import os
 from collections import defaultdict
-from typing import Union
 
 import yaml
 from pydantic import BaseModel
@@ -127,15 +127,63 @@ def make_schema(tool_source_object, include, path):
     return schema
 
 
-def validate_tool_names(tools: Union[list[str], str]) -> str:
+def validate_tool_names(tools: list[str]) -> dict[str, Tool]:
     assert isinstance(tools, list), "tools must be a list of str"
     valid_tools = {}
     for key in tools:
-        # one can define either tool names or tool type names, take union to get the whole set
-        if TOOL_REGISTRY.has_tool(key):
+        # one can define either tool names OR tool tags OR tool path, take union to get the whole set
+        # if tool paths are provided, they will be registered on the fly
+        if os.path.isdir(key) or os.path.isfile(key):
+            valid_tools.update(register_tools_from_path(key))
+        elif TOOL_REGISTRY.has_tool(key):
             valid_tools.update({key: TOOL_REGISTRY.get_tool(key)})
         elif TOOL_REGISTRY.has_tool_tag(key):
             valid_tools.update(TOOL_REGISTRY.get_tools_by_tag(key))
         else:
             logger.warning(f"invalid tool name or tool type name: {key}, skipped")
     return valid_tools
+
+
+def load_module_from_file(filepath):
+    module_name = os.path.splitext(os.path.basename(filepath))[0]
+    spec = importlib.util.spec_from_file_location(module_name, filepath)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def register_tools_from_file(file_path) -> dict[str, Tool]:
+    registered_tools = {}
+    module = load_module_from_file(file_path)
+    for name, obj in inspect.getmembers(module):
+        if inspect.isclass(obj) or inspect.isfunction(obj):
+            if obj.__module__ == module.__name__:
+                # excluding imported classes and functions, register only those defined in the file
+                if "metagpt" in file_path:
+                    # split to handle ../metagpt/metagpt/tools/... where only metapgt/tools/... is needed
+                    file_path = "metagpt" + file_path.split("metagpt")[-1]
+
+                TOOL_REGISTRY.register_tool(
+                    tool_name=name,
+                    tool_path=file_path,
+                    tool_code="",  # inspect.getsource(obj) will resulted in TypeError, skip it for now
+                    tool_source_object=obj,
+                )
+                registered_tools.update({name: TOOL_REGISTRY.get_tool(name)})
+
+    return registered_tools
+
+
+def register_tools_from_path(path) -> dict[str, Tool]:
+    tools_registered = {}
+    if os.path.isfile(path) and path.endswith(".py"):
+        # Path is a Python file
+        tools_registered.update(register_tools_from_file(path))
+    elif os.path.isdir(path):
+        # Path is a directory
+        for root, _, files in os.walk(path):
+            for file in files:
+                if file.endswith(".py"):
+                    file_path = os.path.join(root, file)
+                    tools_registered.update(register_tools_from_file(file_path))
+    return tools_registered
diff --git a/metagpt/utils/parse_docstring.py b/metagpt/utils/parse_docstring.py
index 63c0e6890..5df4d6671 100644
--- a/metagpt/utils/parse_docstring.py
+++ b/metagpt/utils/parse_docstring.py
@@ -3,7 +3,7 @@ from typing import Tuple
 
 
 def remove_spaces(text):
-    return re.sub(r"\s+", " ", text).strip()
+    return re.sub(r"\s+", " ", text).strip() if text else ""
 
 
 class DocstringParser:

From 735c6128183f3a6acb3ffee5c0387275aed6d14c Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Tue, 19 Mar 2024 11:15:24 +0800
Subject: [PATCH 04/12] fix bug of old version param

---
 metagpt/tools/libs/data_preprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/tools/libs/data_preprocess.py b/metagpt/tools/libs/data_preprocess.py
index aa9070689..ef02d8f3a 100644
--- a/metagpt/tools/libs/data_preprocess.py
+++ b/metagpt/tools/libs/data_preprocess.py
@@ -171,7 +171,7 @@ class OneHotEncode(DataPreprocessTool):
 
     def __init__(self, features: list):
         self.features = features
-        self.model = OneHotEncoder(handle_unknown="ignore", sparse=False)
+        self.model = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
 
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         ts_data = self.model.transform(df[self.features])

From cdc9015ec606df741cd92432f3e9a2de9cc348b6 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 19 Mar 2024 17:53:26 +0800
Subject: [PATCH 05/12] fix critical bug: human prior not injected

---
 metagpt/strategy/planner.py                   |  5 ++-
 metagpt/strategy/task_type.py                 |  7 ++++
 tests/data/rsp_cache.json                     |  9 ++++-
 .../metagpt/roles/di/test_data_interpreter.py |  1 -
 tests/metagpt/strategy/test_planner.py        | 37 +++++++++++++++++++
 5 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 tests/metagpt/strategy/test_planner.py

diff --git a/metagpt/strategy/planner.py b/metagpt/strategy/planner.py
index 0fc9cf77f..fbf784837 100644
--- a/metagpt/strategy/planner.py
+++ b/metagpt/strategy/planner.py
@@ -164,8 +164,9 @@ class Planner(BaseModel):
         code_written = "\n\n".join(code_written)
         task_results = [task.result for task in finished_tasks]
         task_results = "\n\n".join(task_results)
-        task_type_name = self.current_task.task_type.upper()
-        guidance = TaskType[task_type_name].value.guidance if hasattr(TaskType, task_type_name) else ""
+        task_type_name = self.current_task.task_type
+        task_type = TaskType.get_type(task_type_name)
+        guidance = task_type.guidance if task_type else ""
 
         # combine components in a prompt
         prompt = PLAN_STATUS.format(
diff --git a/metagpt/strategy/task_type.py b/metagpt/strategy/task_type.py
index 7c88817cc..d21705c16 100644
--- a/metagpt/strategy/task_type.py
+++ b/metagpt/strategy/task_type.py
@@ -71,3 +71,10 @@ class TaskType(Enum):
     @property
     def type_name(self):
         return self.value.name
+
+    @classmethod
+    def get_type(cls, type_name):
+        for member in cls:
+            if member.type_name == type_name:
+                return member.value
+        return None
diff --git a/tests/data/rsp_cache.json b/tests/data/rsp_cache.json
index 8f0567c56..565241779 100644
--- a/tests/data/rsp_cache.json
+++ b/tests/data/rsp_cache.json
@@ -420,5 +420,12 @@
     "user: \n## User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n## Context\n\n## Current Plan\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Perform exploratory data analysis on the sklearn Wine recognition dataset including summary statistics and a plot.\",\n        \"task_type\": \"eda\",\n        \"code\": \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nfrom sklearn.datasets import load_wine\\n\\n# Load the wine dataset\\nwine = load_wine()\\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\\n                       columns=wine['feature_names'] + ['target'])\\n\\n# Summary statistics for numerical features\\nnumerical_summary = wine_df.describe()\\n\\n# Summary statistics for categorical features\\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\\n\\n# Correlation matrix for numerical features\\ncorrelation_matrix = wine_df.corr()\\n\\n# Plotting a histogram for each numerical feature\\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\\nplt.tight_layout()\\nplt.show()\\n\\n# Displaying the summary statistics\\nprint(\\\"Numerical Summary:\\\\n\\\", numerical_summary)\\nprint(\\\"\\\\nCategorical Summary:\\\\n\\\", categorical_summary)\\nprint(\\\"\\\\nCorrelation Matrix:\\\\n\\\", correlation_matrix)\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\n            \"1\"\n        ],\n        \"instruction\": \"Preprocess the dataset by splitting it into training and validation sets with a 80-20 split.\",\n        \"task_type\": \"data preprocessing\",\n        \"code\": \"from sklearn.model_selection import train_test_split\\n\\n# Split the data into training and validation sets\\nX_train, X_val, y_train, y_val = train_test_split(\\n    wine.data, wine.target, test_size=0.2, random_state=42\\n)\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\n            \"2\"\n        ],\n        \"instruction\": \"Train a model using the training set to predict wine class.\",\n        \"task_type\": \"model train\",\n        \"code\": \"from sklearn.ensemble import RandomForestClassifier\\nfrom sklearn.metrics import accuracy_score\\n\\n# Initialize the model\\nmodel = RandomForestClassifier(random_state=42)\\n\\n# Train the model\\nmodel.fit(X_train, y_train)\\n\\n# Predict the validation set\\ny_pred = model.predict(X_val)\\n\\n# Calculate the accuracy\\naccuracy = accuracy_score(y_val, y_pred)\\nprint(f\\\"Validation Accuracy: {accuracy:.2f}\\\")\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"4\",\n        \"dependent_task_ids\": [\n            \"3\"\n        ],\n        \"instruction\": \"Evaluate the model on the validation set and show the validation accuracy.\",\n        \"task_type\": \"model evaluate\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    }\n]\n## Current Task\n{\"task_id\":\"4\",\"dependent_task_ids\":[\"3\"],\"instruction\":\"Evaluate the model on the validation set and show the validation accuracy.\",\"task_type\":\"model evaluate\",\"code\":\"\",\"result\":\"\",\"is_success\":false,\"is_finished\":false}\n\nassistant: # Since the model has already been trained and predictions have been made on the validation set,\n# we only need to evaluate the model using the predictions and the true labels from the validation set.\n\n# The accuracy has already been calculated and printed in the previous task.\n# However, to follow the task guidance and to ensure the output is only related to the current task,\n# we will calculate and print the validation accuracy again.\n\nvalidation_accuracy = accuracy_score(y_val, y_pred)\nprint(f\"Validation Accuracy: {validation_accuracy:.2f}\")\n\nuser: a successful runThis is a <task> review. Please review output from metagpt.actions.di.execute_nb_code.ExecuteNbCode\nIf you want to change, add, delete a task or merge tasks in the plan, say 'change task task_id or current task, ... (things to change)' If you confirm the output from the current task and wish to continue, type: confirmIf you think user requirement has been fulfilled completedly, you can finish the process by typing: finish\nIf you want to terminate the process, type: exit\nPlease type your review below:\n": "confirm",
     "As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function.#SYSTEM_MSG_END#\n# User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n\n# Plan Status\n\n\n# Tool Info\n\n\n# Constraints\n- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.\n- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.\n- Always prioritize using pre-defined tools for the same functionality.\n\n# Output\nWhile some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:\n```python\nyour code\n```\n#MSG_SEP#Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.": "To achieve the user requirement, we will follow these steps:\n\n1. Load the Wine recognition dataset from `sklearn.datasets`.\n2. Perform a basic data analysis to understand the dataset.\n3. Visualize the data to gain insights.\n4. Split the dataset into training and validation sets.\n5. Train a classification model using the training set.\n6. Evaluate the model on the validation set and report the accuracy.\n\nLet's start by loading the dataset and performing the initial data analysis:\n\n```python\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\n\n# Load the wine dataset\nwine = load_wine()\nX = wine.data\ny = wine.target\nfeature_names = wine.feature_names\nclass_names = wine.target_names\n\n# Convert to a DataFrame for better visualization and analysis\nwine_df = pd.DataFrame(X, columns=feature_names)\nwine_df['target'] = y\n\n# Basic data analysis\nprint(wine_df.describe())\n\n# Plotting the data\n# Let's plot the distribution of alcohol content for each class\nfor class_idx, class_name in enumerate(class_names):\n    plt.hist(wine_df[wine_df['target'] == class_idx]['alcohol'], label=class_name, alpha=0.5)\nplt.title('Alcohol content distribution by wine class')\nplt.xlabel('Alcohol content')\nplt.ylabel('Frequency')\nplt.legend()\nplt.show()\n\n# Split the dataset into training and validation sets (80% train, 20% validation)\nX_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Train a RandomForestClassifier\nmodel = RandomForestClassifier(random_state=42)\nmodel.fit(X_train, y_train)\n\n# Predict on the validation set\ny_pred = model.predict(X_val)\n\n# Calculate the accuracy on the validation set\naccuracy = accuracy_score(y_val, y_pred)\nprint(f'Validation accuracy: {accuracy:.2f}')\n```\n\nThis code will load the dataset, perform a basic analysis, plot the distribution of alcohol content for each wine class, split the data, train a RandomForestClassifier, and finally, evaluate its performance on the validation set.",
     "\n# User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n# Context\n[user: Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy., assistant: import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\n\n# Load the wine dataset\nwine = load_wine()\nX = wine.data\ny = wine.target\nfeature_names = wine.feature_names\nclass_names = wine.target_names\n\n# Convert to a DataFrame for better visualization and analysis\nwine_df = pd.DataFrame(X, columns=feature_names)\nwine_df['target'] = y\n\n# Basic data analysis\nprint(wine_df.describe())\n\n# Plotting the data\n# Let's plot the distribution of alcohol content for each class\nfor class_idx, class_name in enumerate(class_names):\n    plt.hist(wine_df[wine_df['target'] == class_idx]['alcohol'], label=class_name, alpha=0.5)\nplt.title('Alcohol content distribution by wine class')\nplt.xlabel('Alcohol content')\nplt.ylabel('Frequency')\nplt.legend()\nplt.show()\n\n# Split the dataset into training and validation sets (80% train, 20% validation)\nX_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Train a RandomForestClassifier\nmodel = RandomForestClassifier(random_state=42)\nmodel.fit(X_train, y_train)\n\n# Predict on the validation set\ny_pred = model.predict(X_val)\n\n# Calculate the accuracy on the validation set\naccuracy = accuracy_score(y_val, y_pred)\nprint(f'Validation accuracy: {accuracy:.2f}')\n, user: a successful run]\n\nOutput a json following the format:\n```json\n{\n    \"thoughts\": str = \"Thoughts on current situation, reflect on how you should proceed to fulfill the user requirement\",\n    \"state\": bool = \"Decide whether you need to take more actions to complete the user requirement. Return true if you think so. Return false if you think the requirement has been completely fulfilled.\"\n}\n```\n": "```json\n{\n    \"thoughts\": \"The user has requested a data analysis on the sklearn Wine recognition dataset, including a plot and training a model to predict wine class with validation accuracy. The provided code snippet appears to fulfill the user's requirements. It loads the dataset, performs basic data analysis, creates a plot of alcohol content distribution by wine class, splits the data into training and validation sets, trains a RandomForestClassifier, predicts on the validation set, and prints the validation accuracy. Assuming the code runs successfully without errors, the user's requirements should be met.\",\n    \"state\": false\n}\n```",
-    "\n## User Requirement:\nconduct feature engineering, add new features on the dataset\n\n## Task\nRecommend up to 5 tools from 'Available Tools' that can help solve the 'User Requirement'. \n\n## Available Tools:\n{'PolynomialExpansion': 'Add polynomial and interaction features from selected numeric columns to input DataFrame.', 'scrape_web_playwright': 'Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright. ', 'FillMissingValue': 'Completing missing values with simple strategies.'}\n\n## Tool Selection and Instructions:\n- Select tools most relevant to completing the 'User Requirement'.\n- If you believe that no tools are suitable, indicate with an empty list.\n- Only list the names of the tools, not the full schema of each tool.\n- Ensure selected tools are listed in 'Available Tools'.\n- Output a json list of tool names:\n```json\n[\"tool_name1\", \"tool_name2\", ...]\n```\n": "```json\n[\"PolynomialExpansion\", \"FillMissingValue\"]\n```"
+    "\n## User Requirement:\nconduct feature engineering, add new features on the dataset\n\n## Task\nRecommend up to 5 tools from 'Available Tools' that can help solve the 'User Requirement'. \n\n## Available Tools:\n{'PolynomialExpansion': 'Add polynomial and interaction features from selected numeric columns to input DataFrame.', 'scrape_web_playwright': 'Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright. ', 'FillMissingValue': 'Completing missing values with simple strategies.'}\n\n## Tool Selection and Instructions:\n- Select tools most relevant to completing the 'User Requirement'.\n- If you believe that no tools are suitable, indicate with an empty list.\n- Only list the names of the tools, not the full schema of each tool.\n- Ensure selected tools are listed in 'Available Tools'.\n- Output a json list of tool names:\n```json\n[\"tool_name1\", \"tool_name2\", ...]\n```\n": "```json\n[\"PolynomialExpansion\", \"FillMissingValue\"]\n```",
+    "As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function.#SYSTEM_MSG_END#\n# User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n\n# Plan Status\n\n## Finished Tasks\n### code\n```python\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nwine = load_wine()\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\n                       columns=wine['feature_names'] + ['target'])\nnumerical_summary = wine_df.describe()\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\ncorrelation_matrix = wine_df.corr()\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\nplt.tight_layout()\nplt.show()\nprint(\"Numerical Summary:\\n\", numerical_summary)\nprint(\"\\nCategorical Summary:\\n\", categorical_summary)\nprint(\"\\nCorrelation Matrix:\\n\", correlation_matrix)\n```\n\n### execution result\na successful run\n\n## Current Task\nPreprocess the dataset by splitting it into training and validation sets with a 80-20 split.\n\n## Task Guidance\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.\nSpecifically, \nThe current task is about data preprocessing, please note the following:\n- Monitor data types per column, applying appropriate methods.\n- Ensure operations are on existing dataset columns.\n- Avoid writing processed data to files.\n- Avoid any change to label column, such as standardization, etc.\n- Prefer alternatives to one-hot encoding for categorical data.\n- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.\n- Each step do data preprocessing to train, must do same for test separately at the same time.\n- Always copy the DataFrame before processing it and use the copy to process.\n\n\n\n# Tool Info\n\n\n# Constraints\n- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.\n- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.\n- Always prioritize using pre-defined tools for the same functionality.\n\n# Output\nWhile some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:\n```python\nyour code\n```\n#MSG_SEP#\n# Latest Data Info\nLatest data info after previous tasks:\na successful run\n": "```python\nfrom sklearn.model_selection import train_test_split\n\n# Split the data into training and validation sets (80-20 split)\nX_train, X_val, y_train, y_val = train_test_split(\n    wine_df.drop('target', axis=1),  # features\n    wine_df['target'],               # target variable\n    test_size=0.2,                   # 20% for validation\n    random_state=42                  # seed for reproducibility\n)\n\n# Output the shapes of the resulting data splits\nprint(f\"Training set shape: {X_train.shape}\")\nprint(f\"Validation set shape: {X_val.shape}\")\nprint(f\"Training target shape: {y_train.shape}\")\nprint(f\"Validation target shape: {y_val.shape}\")\n```",
+    "\n# Background\nCheck latest data info to guide subsequent tasks.\n\n## Finished Tasks\n```python\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nwine = load_wine()\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\n                       columns=wine['feature_names'] + ['target'])\nnumerical_summary = wine_df.describe()\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\ncorrelation_matrix = wine_df.corr()\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\nplt.tight_layout()\nplt.show()\nprint(\"Numerical Summary:\\n\", numerical_summary)\nprint(\"\\nCategorical Summary:\\n\", categorical_summary)\nprint(\"\\nCorrelation Matrix:\\n\", correlation_matrix)\n\nfrom sklearn.model_selection import train_test_split\nX_train, X_val, y_train, y_val = train_test_split(\n    wine_df.drop('target', axis=1),\n    wine_df['target'],\n    test_size=0.2,\n    random_state=42\n)\nprint(f\"Training set shape: {X_train.shape}\")\nprint(f\"Validation set shape: {X_val.shape}\")\nprint(f\"Training target shape: {y_train.shape}\")\nprint(f\"Validation target shape: {y_val.shape}\")\n```end\n\n# Task\nCheck code in finished tasks, print key variables to guide your following actions.\nSpecifically, if it is a data analysis or machine learning task, print the the latest column information using the following code, with DataFrame variable from 'Finished Tasks' in place of df:\n```python\nfrom metagpt.tools.libs.data_preprocess import get_column_info\n\ncolumn_info = get_column_info(df)\nprint(\"column_info\")\nprint(column_info)\n```end\nOtherwise, print out any key variables you see fit. Return an empty string if you think there is no important data to check.\n\n# Constraints:\n- Your code is to be added to a new cell in jupyter.\n\n# Instruction\nOutput code following the format:\n```python\nyour code\n```\n": "```python\nfrom metagpt.tools.libs.data_preprocess import get_column_info\n\ncolumn_info = get_column_info(wine_df)\nprint(\"column_info\")\nprint(column_info)\n```",
+    "As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function.#SYSTEM_MSG_END#\n# User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n\n# Plan Status\n\n## Finished Tasks\n### code\n```python\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nwine = load_wine()\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\n                       columns=wine['feature_names'] + ['target'])\nnumerical_summary = wine_df.describe()\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\ncorrelation_matrix = wine_df.corr()\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\nplt.tight_layout()\nplt.show()\nprint(\"Numerical Summary:\\n\", numerical_summary)\nprint(\"\\nCategorical Summary:\\n\", categorical_summary)\nprint(\"\\nCorrelation Matrix:\\n\", correlation_matrix)\n\nfrom sklearn.model_selection import train_test_split\nX_train, X_val, y_train, y_val = train_test_split(\n    wine_df.drop('target', axis=1),\n    wine_df['target'],\n    test_size=0.2,\n    random_state=42\n)\nprint(f\"Training set shape: {X_train.shape}\")\nprint(f\"Validation set shape: {X_val.shape}\")\nprint(f\"Training target shape: {y_train.shape}\")\nprint(f\"Validation target shape: {y_val.shape}\")\n```\n\n### execution result\na successful run\n\na successful run\n\n## Current Task\nTrain a model using the training set to predict wine class.\n\n## Task Guidance\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.\nSpecifically, \nThe current task is about training a model, please ensure high performance:\n- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.\n- If non-numeric columns exist, perform label encode together with all steps.\n- Use the data from previous task result directly, do not mock or reload data yourself.\n- Set suitable hyperparameters for the model, make metrics as high as possible.\n\n\n\n# Tool Info\n\n\n# Constraints\n- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.\n- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.\n- Always prioritize using pre-defined tools for the same functionality.\n\n# Output\nWhile some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:\n```python\nyour code\n```\n#MSG_SEP#\n# Latest Data Info\nLatest data info after previous tasks:\na successful run\n": "```python\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\n\n# Initialize the RandomForestClassifier\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\n\n# Train the model on the training data\nmodel.fit(X_train, y_train)\n\n# Predict the wine classes on the validation set\ny_val_pred = model.predict(X_val)\n\n# Calculate the accuracy on the validation set\nval_accuracy = accuracy_score(y_val, y_val_pred)\n\nprint(f\"Validation Accuracy: {val_accuracy:.4f}\")\n```",
+    "As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function.#SYSTEM_MSG_END#\n# User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n\n# Plan Status\n\n## Finished Tasks\n### code\n```python\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import load_wine\nwine = load_wine()\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\n                       columns=wine['feature_names'] + ['target'])\nnumerical_summary = wine_df.describe()\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\ncorrelation_matrix = wine_df.corr()\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\nplt.tight_layout()\nplt.show()\nprint(\"Numerical Summary:\\n\", numerical_summary)\nprint(\"\\nCategorical Summary:\\n\", categorical_summary)\nprint(\"\\nCorrelation Matrix:\\n\", correlation_matrix)\n\nfrom sklearn.model_selection import train_test_split\nX_train, X_val, y_train, y_val = train_test_split(\n    wine_df.drop('target', axis=1),\n    wine_df['target'],\n    test_size=0.2,\n    random_state=42\n)\nprint(f\"Training set shape: {X_train.shape}\")\nprint(f\"Validation set shape: {X_val.shape}\")\nprint(f\"Training target shape: {y_train.shape}\")\nprint(f\"Validation target shape: {y_val.shape}\")\n\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\nmodel.fit(X_train, y_train)\ny_val_pred = model.predict(X_val)\nval_accuracy = accuracy_score(y_val, y_val_pred)\nprint(f\"Validation Accuracy: {val_accuracy:.4f}\")\n```\n\n### execution result\na successful run\n\na successful run\n\na successful run\n\n## Current Task\nEvaluate the model on the validation set and show the validation accuracy.\n\n## Task Guidance\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.\nSpecifically, \nThe current task is about evaluating a model, please note the following:\n- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.\n- Use trained model from previous task result directly, do not mock or reload model yourself.\n\n\n\n# Tool Info\n\n\n# Constraints\n- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.\n- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.\n- Always prioritize using pre-defined tools for the same functionality.\n\n# Output\nWhile some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:\n```python\nyour code\n```\n": "```python\n# Evaluate the model on the validation set and show the validation accuracy\nval_accuracy = accuracy_score(y_val, y_val_pred)\nprint(f\"Validation Accuracy: {val_accuracy:.4f}\")\n```",
+    "user: \n## User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n## Context\n\n## Current Plan\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Perform exploratory data analysis on the sklearn Wine recognition dataset including summary statistics and a plot.\",\n        \"task_type\": \"eda\",\n        \"code\": \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nfrom sklearn.datasets import load_wine\\n\\n# Load the wine dataset\\nwine = load_wine()\\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\\n                       columns=wine['feature_names'] + ['target'])\\n\\n# Summary statistics for numerical features\\nnumerical_summary = wine_df.describe()\\n\\n# Summary statistics for categorical features\\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\\n\\n# Correlation matrix for numerical features\\ncorrelation_matrix = wine_df.corr()\\n\\n# Plotting a histogram for each numerical feature\\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\\nplt.tight_layout()\\nplt.show()\\n\\n# Displaying the summary statistics\\nprint(\\\"Numerical Summary:\\\\n\\\", numerical_summary)\\nprint(\\\"\\\\nCategorical Summary:\\\\n\\\", categorical_summary)\\nprint(\\\"\\\\nCorrelation Matrix:\\\\n\\\", correlation_matrix)\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\n            \"1\"\n        ],\n        \"instruction\": \"Preprocess the dataset by splitting it into training and validation sets with a 80-20 split.\",\n        \"task_type\": \"data preprocessing\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\n            \"2\"\n        ],\n        \"instruction\": \"Train a model using the training set to predict wine class.\",\n        \"task_type\": \"model train\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    },\n    {\n        \"task_id\": \"4\",\n        \"dependent_task_ids\": [\n            \"3\"\n        ],\n        \"instruction\": \"Evaluate the model on the validation set and show the validation accuracy.\",\n        \"task_type\": \"model evaluate\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    }\n]\n## Current Task\n{\"task_id\":\"2\",\"dependent_task_ids\":[\"1\"],\"instruction\":\"Preprocess the dataset by splitting it into training and validation sets with a 80-20 split.\",\"task_type\":\"data preprocessing\",\"code\":\"\",\"result\":\"\",\"is_success\":false,\"is_finished\":false}\n\nuser: \n# Latest Data Info\nLatest data info after previous tasks:\na successful run\n\nassistant: from sklearn.model_selection import train_test_split\n\n# Split the data into training and validation sets (80-20 split)\nX_train, X_val, y_train, y_val = train_test_split(\n    wine_df.drop('target', axis=1),  # features\n    wine_df['target'],               # target variable\n    test_size=0.2,                   # 20% for validation\n    random_state=42                  # seed for reproducibility\n)\n\n# Output the shapes of the resulting data splits\nprint(f\"Training set shape: {X_train.shape}\")\nprint(f\"Validation set shape: {X_val.shape}\")\nprint(f\"Training target shape: {y_train.shape}\")\nprint(f\"Validation target shape: {y_val.shape}\")\n\nuser: a successful runThis is a <task> review. Please review output from metagpt.actions.di.execute_nb_code.ExecuteNbCode\nIf you want to change, add, delete a task or merge tasks in the plan, say 'change task task_id or current task, ... (things to change)' If you confirm the output from the current task and wish to continue, type: confirmIf you think user requirement has been fulfilled completedly, you can finish the process by typing: finish\nIf you want to terminate the process, type: exit\nPlease type your review below:\n": "confirm",
+    "user: \n## User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n## Context\n\n## Current Plan\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Perform exploratory data analysis on the sklearn Wine recognition dataset including summary statistics and a plot.\",\n        \"task_type\": \"eda\",\n        \"code\": \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nfrom sklearn.datasets import load_wine\\n\\n# Load the wine dataset\\nwine = load_wine()\\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\\n                       columns=wine['feature_names'] + ['target'])\\n\\n# Summary statistics for numerical features\\nnumerical_summary = wine_df.describe()\\n\\n# Summary statistics for categorical features\\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\\n\\n# Correlation matrix for numerical features\\ncorrelation_matrix = wine_df.corr()\\n\\n# Plotting a histogram for each numerical feature\\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\\nplt.tight_layout()\\nplt.show()\\n\\n# Displaying the summary statistics\\nprint(\\\"Numerical Summary:\\\\n\\\", numerical_summary)\\nprint(\\\"\\\\nCategorical Summary:\\\\n\\\", categorical_summary)\\nprint(\\\"\\\\nCorrelation Matrix:\\\\n\\\", correlation_matrix)\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\n            \"1\"\n        ],\n        \"instruction\": \"Preprocess the dataset by splitting it into training and validation sets with a 80-20 split.\",\n        \"task_type\": \"data preprocessing\",\n        \"code\": \"from sklearn.model_selection import train_test_split\\n\\n# Split the data into training and validation sets (80-20 split)\\nX_train, X_val, y_train, y_val = train_test_split(\\n    wine_df.drop('target', axis=1),  # features\\n    wine_df['target'],               # target variable\\n    test_size=0.2,                   # 20% for validation\\n    random_state=42                  # seed for reproducibility\\n)\\n\\n# Output the shapes of the resulting data splits\\nprint(f\\\"Training set shape: {X_train.shape}\\\")\\nprint(f\\\"Validation set shape: {X_val.shape}\\\")\\nprint(f\\\"Training target shape: {y_train.shape}\\\")\\nprint(f\\\"Validation target shape: {y_val.shape}\\\")\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\n            \"2\"\n        ],\n        \"instruction\": \"Train a model using the training set to predict wine class.\",\n        \"task_type\": \"model train\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    },\n    {\n        \"task_id\": \"4\",\n        \"dependent_task_ids\": [\n            \"3\"\n        ],\n        \"instruction\": \"Evaluate the model on the validation set and show the validation accuracy.\",\n        \"task_type\": \"model evaluate\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    }\n]\n## Current Task\n{\"task_id\":\"3\",\"dependent_task_ids\":[\"2\"],\"instruction\":\"Train a model using the training set to predict wine class.\",\"task_type\":\"model train\",\"code\":\"\",\"result\":\"\",\"is_success\":false,\"is_finished\":false}\n\nuser: \n# Latest Data Info\nLatest data info after previous tasks:\na successful run\n\nassistant: from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\n\n# Initialize the RandomForestClassifier\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\n\n# Train the model on the training data\nmodel.fit(X_train, y_train)\n\n# Predict the wine classes on the validation set\ny_val_pred = model.predict(X_val)\n\n# Calculate the accuracy on the validation set\nval_accuracy = accuracy_score(y_val, y_val_pred)\n\nprint(f\"Validation Accuracy: {val_accuracy:.4f}\")\n\nuser: a successful runThis is a <task> review. Please review output from metagpt.actions.di.execute_nb_code.ExecuteNbCode\nIf you want to change, add, delete a task or merge tasks in the plan, say 'change task task_id or current task, ... (things to change)' If you confirm the output from the current task and wish to continue, type: confirmIf you think user requirement has been fulfilled completedly, you can finish the process by typing: finish\nIf you want to terminate the process, type: exit\nPlease type your review below:\n": "confirm",
+    "user: \n## User Requirement\nRun data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\n## Context\n\n## Current Plan\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Perform exploratory data analysis on the sklearn Wine recognition dataset including summary statistics and a plot.\",\n        \"task_type\": \"eda\",\n        \"code\": \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\nfrom sklearn.datasets import load_wine\\n\\n# Load the wine dataset\\nwine = load_wine()\\nwine_df = pd.DataFrame(data=np.c_[wine['data'], wine['target']],\\n                       columns=wine['feature_names'] + ['target'])\\n\\n# Summary statistics for numerical features\\nnumerical_summary = wine_df.describe()\\n\\n# Summary statistics for categorical features\\ncategorical_summary = wine_df.select_dtypes(include=['object', 'category']).describe()\\n\\n# Correlation matrix for numerical features\\ncorrelation_matrix = wine_df.corr()\\n\\n# Plotting a histogram for each numerical feature\\nwine_df.hist(bins=15, figsize=(15, 10), layout=(4, 4))\\nplt.tight_layout()\\nplt.show()\\n\\n# Displaying the summary statistics\\nprint(\\\"Numerical Summary:\\\\n\\\", numerical_summary)\\nprint(\\\"\\\\nCategorical Summary:\\\\n\\\", categorical_summary)\\nprint(\\\"\\\\nCorrelation Matrix:\\\\n\\\", correlation_matrix)\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\n            \"1\"\n        ],\n        \"instruction\": \"Preprocess the dataset by splitting it into training and validation sets with a 80-20 split.\",\n        \"task_type\": \"data preprocessing\",\n        \"code\": \"from sklearn.model_selection import train_test_split\\n\\n# Split the data into training and validation sets (80-20 split)\\nX_train, X_val, y_train, y_val = train_test_split(\\n    wine_df.drop('target', axis=1),  # features\\n    wine_df['target'],               # target variable\\n    test_size=0.2,                   # 20% for validation\\n    random_state=42                  # seed for reproducibility\\n)\\n\\n# Output the shapes of the resulting data splits\\nprint(f\\\"Training set shape: {X_train.shape}\\\")\\nprint(f\\\"Validation set shape: {X_val.shape}\\\")\\nprint(f\\\"Training target shape: {y_train.shape}\\\")\\nprint(f\\\"Validation target shape: {y_val.shape}\\\")\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\n            \"2\"\n        ],\n        \"instruction\": \"Train a model using the training set to predict wine class.\",\n        \"task_type\": \"model train\",\n        \"code\": \"from sklearn.ensemble import RandomForestClassifier\\nfrom sklearn.metrics import accuracy_score\\n\\n# Initialize the RandomForestClassifier\\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\\n\\n# Train the model on the training data\\nmodel.fit(X_train, y_train)\\n\\n# Predict the wine classes on the validation set\\ny_val_pred = model.predict(X_val)\\n\\n# Calculate the accuracy on the validation set\\nval_accuracy = accuracy_score(y_val, y_val_pred)\\n\\nprint(f\\\"Validation Accuracy: {val_accuracy:.4f}\\\")\\n\",\n        \"result\": \"a successful run\",\n        \"is_success\": true,\n        \"is_finished\": true\n    },\n    {\n        \"task_id\": \"4\",\n        \"dependent_task_ids\": [\n            \"3\"\n        ],\n        \"instruction\": \"Evaluate the model on the validation set and show the validation accuracy.\",\n        \"task_type\": \"model evaluate\",\n        \"code\": \"\",\n        \"result\": \"\",\n        \"is_success\": false,\n        \"is_finished\": false\n    }\n]\n## Current Task\n{\"task_id\":\"4\",\"dependent_task_ids\":[\"3\"],\"instruction\":\"Evaluate the model on the validation set and show the validation accuracy.\",\"task_type\":\"model evaluate\",\"code\":\"\",\"result\":\"\",\"is_success\":false,\"is_finished\":false}\n\nassistant: # Evaluate the model on the validation set and show the validation accuracy\nval_accuracy = accuracy_score(y_val, y_val_pred)\nprint(f\"Validation Accuracy: {val_accuracy:.4f}\")\n\nuser: a successful runThis is a <task> review. Please review output from metagpt.actions.di.execute_nb_code.ExecuteNbCode\nIf you want to change, add, delete a task or merge tasks in the plan, say 'change task task_id or current task, ... (things to change)' If you confirm the output from the current task and wish to continue, type: confirmIf you think user requirement has been fulfilled completedly, you can finish the process by typing: finish\nIf you want to terminate the process, type: exit\nPlease type your review below:\n": "confirm"
 }
\ No newline at end of file
diff --git a/tests/metagpt/roles/di/test_data_interpreter.py b/tests/metagpt/roles/di/test_data_interpreter.py
index d25e5a099..e5cc5b29b 100644
--- a/tests/metagpt/roles/di/test_data_interpreter.py
+++ b/tests/metagpt/roles/di/test_data_interpreter.py
@@ -25,7 +25,6 @@ async def test_interpreter(mocker, auto_run):
 @pytest.mark.asyncio
 async def test_interpreter_react_mode(mocker):
     mocker.patch("metagpt.actions.di.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
-    mocker.patch("builtins.input", return_value="confirm")
 
     requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
 
diff --git a/tests/metagpt/strategy/test_planner.py b/tests/metagpt/strategy/test_planner.py
new file mode 100644
index 000000000..ff1c6da3f
--- /dev/null
+++ b/tests/metagpt/strategy/test_planner.py
@@ -0,0 +1,37 @@
+from metagpt.schema import Plan, Task
+from metagpt.strategy.planner import Planner
+from metagpt.strategy.task_type import TaskType
+
+MOCK_TASK_MAP = {
+    "1": Task(
+        task_id="1",
+        instruction="test instruction for finished task",
+        task_type=TaskType.EDA.type_name,
+        dependent_task_ids=[],
+        code="some finished test code",
+        result="some finished test result",
+        is_finished=True,
+    ),
+    "2": Task(
+        task_id="2",
+        instruction="test instruction for current task",
+        task_type=TaskType.DATA_PREPROCESS.type_name,
+        dependent_task_ids=["1"],
+    ),
+}
+MOCK_PLAN = Plan(
+    goal="test goal",
+    tasks=list(MOCK_TASK_MAP.values()),
+    task_map=MOCK_TASK_MAP,
+    current_task_id="2",
+)
+
+
+def test_planner_get_plan_status():
+    planner = Planner(plan=MOCK_PLAN)
+    status = planner.get_plan_status()
+
+    assert "some finished test code" in status
+    assert "some finished test result" in status
+    assert "test instruction for current task" in status
+    assert TaskType.DATA_PREPROCESS.value.guidance in status  # current task guidance

From 8d4567ea199b8864623d7dd26fb4cc8af8246060 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 19 Mar 2024 19:36:59 +0800
Subject: [PATCH 06/12] support tool recommender spec during init

---
 metagpt/roles/di/data_interpreter.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py
index a8534b710..1943b4234 100644
--- a/metagpt/roles/di/data_interpreter.py
+++ b/metagpt/roles/di/data_interpreter.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import json
-from typing import Literal, Union
+from typing import Literal
 
 from pydantic import Field, model_validator
 
@@ -39,7 +39,7 @@ class DataInterpreter(Role):
     use_plan: bool = True
     use_reflection: bool = False
     execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
-    tools: Union[str, list[str]] = []  # Use special symbol ["<all>"] to indicate use of all registered tools
+    tools: list[str] = []  # Use special symbol ["<all>"] to indicate use of all registered tools
     tool_recommender: ToolRecommender = None
     react_mode: Literal["plan_and_act", "react"] = "plan_and_act"
     max_react_loop: int = 10  # used for react mode
@@ -50,7 +50,7 @@ class DataInterpreter(Role):
         self.use_plan = (
             self.react_mode == "plan_and_act"
         )  # create a flag for convenience, overwrite any passed-in value
-        if self.tools:
+        if self.tools and not self.tool_recommender:
             self.tool_recommender = BM25ToolRecommender(tools=self.tools)
         self.set_actions([WriteAnalysisCode])
         self._set_state(0)
@@ -104,7 +104,7 @@ class DataInterpreter(Role):
         plan_status = self.planner.get_plan_status() if self.use_plan else ""
 
         # tool info
-        if self.tools:
+        if self.tool_recommender:
             context = (
                 self.working_memory.get()[-1].content if self.working_memory.get() else ""
             )  # thoughts from _think stage in 'react' mode

From bc0d7b0620071f30f22823ef9522da8eabe66c26 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 20 Mar 2024 22:05:31 +0800
Subject: [PATCH 07/12] Update README.md and add tool_recommend

---
 examples/di/README.md               | 106 +++++++++++++++-------------
 examples/di/requirements_prompt.py  |  66 ++++++++---------
 examples/di/run_ml_benchmark.py     |   3 +-
 examples/di/run_open_ended_tasks.py |   3 +-
 4 files changed, 92 insertions(+), 86 deletions(-)

diff --git a/examples/di/README.md b/examples/di/README.md
index 3dbdc328b..e335a2617 100644
--- a/examples/di/README.md
+++ b/examples/di/README.md
@@ -17,84 +17,92 @@ ## Example List
 
 Please see the [docs](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for more explanation.
 
-## Paper Experiments
+## Experiments in the Paper
 
-Before running the experiments, download the [di_dataset](https://drive.google.com/drive/folders/17SpI9WL9kzd260q2DArbXKNcqhidjA7s?usp=sharing) and place it in the specified path (default `DATA_PATH` or any other path you prefer). Note that the `DATA_PATH = METAGPT_ROOT / "data"`.
+Before running the experiments, download the [di_dataset](https://drive.google.com/drive/folders/17SpI9WL9kzd260q2DArbXKNcqhidjA7s?usp=sharing) and place it in the specified path (default DATA_PATH, where DATA_PATH = METAGPT_ROOT / "data").
 
 To reproduce the results in the paper, run the following commands:
 
 ```
-python run_ml_benchmark.py --task_name 05_titanic
+python run_ml_benchmark.py --task_name 04_titanic
+```
+```
 python run_open_ended_tasks.py --task_name 14_image_background_removal --data_dir directory_to_di_dataset --use_reflection True
 ```
 
-The very simple `run_ml_benchmark.py` and `run_open_ended_tasks.py` scripts implement the pipeline of the Data Interpreter.
+The `run_ml_benchmark.py` and `run_open_ended_tasks.py` scripts implement the pipeline of the Data Interpreter.
 
 Some key arguments:
 
-- `--task_name`: required, the name of the task that combines the ID and the dataset name or the scenario name, e.g., `05_titanic` or `14_image_background_removal`.
+- `--task_name`: required, specifies the task to run. e.g., 04_titanic and 14_image_background_removal. Refer to the table below for available task names.
 - `--data_dir`: optional, the directory that stores the `di_dataset` (default is `DATA_PATH`).
 - `--use_reflection`: optional, the flag to use reflection or not (default is True).
 
-## Data Interpreter Benchmark
-
 ### Data Interpreter Dataset Structure
 
 di_dataset
 
 - ml_benchmark
-    - 05_titanic
-    - 06_house-prices-advanced-regression-techniques
-    - 07_santander-customer-transaction-prediction
-    - 08_icr-identify-age-related-conditions
-    - 09_santander-value-prediction-challenge
+    - 04_titanic
+    - 05_house-prices-advanced-regression-techniques
+    - 06_santander-customer-transaction-prediction
+    - 07_icr-identify-age-related-conditions
+    - 08_santander-value-prediction-challenge
 - open_ended_tasks
-    - 01_invoice_ocr
-    - 02_invoice_ocr
-    - 03_invoice_ocr
+    - 01_ocr
+    - 02_ocr
+    - 03_ocr
     - 14_image_background_removal
     - 16_image_2_code_generation
     - 17_image_2_code_generation
 
 ### ML-Benchmark Dataset and Requirements
 
-ML-Benchmark contains 9 typical machine learning datasets.
+ML-Benchmark contains 8 typical machine learning datasets.
 
-| ID | Dataset Name                                  | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-|----|-----------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 01 | Iris                                          | Run data analysis on sklearn Iris dataset, include a plot                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| 02 | Diabetes                                      | Run data analysis on sklearn Diabetes dataset, include a plot                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| 03 | Wine recognition                              | Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class with 20% as test set, and show prediction accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| 04 | Breast Cancer Wisconsin (Diagnostic)          | Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                   |
-| 05 | Titanic                                       | This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/05_titanic/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/05_titanic/split_eval.csv'.                                                                                                                                                                               |
-| 06 | House Prices - Advanced Regression Techniques | This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/06_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/06_house-prices-advanced-regression-techniques/split_eval.csv'.      |
-| 07 | Santander Customer Transaction Prediction     | This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/07_santander-customer-transaction-prediction/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/07_santander-customer-transaction-prediction/split_eval.csv' .                                                                                    |
-| 08 | ICR - Identifying Age-Related Conditions      | This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/08_icr-identify-age-related-conditions/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/08_icr-identify-age-related-conditions/split_eval.csv' . |
-| 09 | Santander Value Prediction Challenge          | This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{DATA_PATH}/ml_benchmark/09_santander-value-prediction-challenge/split_train.csv', eval data path: '{DATA_PATH}/ml_benchmark/09_santander-value-prediction-challenge/split_eval.csv' .                                                                                                     |
+| ID | Task Name             | Dataset Name       | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|----|-----------------------|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 01 | 01_iris               | Iris               | Run data analysis on sklearn Iris dataset, include a plot                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| 02 | 02_wines_recognition  | Wine recognition   | Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class with 20% as test set, and show prediction accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| 03 | 03_breast_cancer      | Breast Cancer      | Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| 04 | 04_titanic            | Titanic            | This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_dir}/ml_benchmark/4_titanic/split_train.csv', eval data path: '{data_dir}/ml_benchmark/04_titanic/split_eval.csv'.                                                                                                                                                                                |
+| 05 | 05_house_prices       | House Prices       | This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_dir}/ml_benchmark/05_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{data_dir}/ml_benchmark/05_house-prices-advanced-regression-techniques/split_eval.csv'.      |
+| 06 | 06_santander_customer | Santander Customer | This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{data_dir}/ml_benchmark/06_santander-customer-transaction-prediction/split_train.csv', eval data path: '{data_dir}/ml_benchmark/06_santander-customer-transaction-prediction/split_eval.csv' .                                                                                    |
+| 07 | 07_icr_identify       | ICR - Identifying  | This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_dir}/ml_benchmark/07_icr-identify-age-related-conditions/split_train.csv', eval data path: '{data_dir}/ml_benchmark/07_icr-identify-age-related-conditions/split_eval.csv' . |
+| 08 | 08_santander_value    | Santander Value    | This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{data_dir}/ml_benchmark/08_santander-value-prediction-challenge/split_train.csv', eval data path: '{data_dir}/ml_benchmark/08_santander-value-prediction-challenge/split_eval.csv' .                                                                                                     |
+
+**Note**:
+1. `data_dir` is the directory where the di_dataset is stored.
 
 ### Open-Ended Tasks Dataset and Requirements
 
 Open-Ended Tasks have collected and designed 20 moderately challenging open-ended tasks, requiring Data Interpreters to understand user requirements, plan and decompose tasks, and generate and execute code.
 
-| ID | Scenario                           | Scenario Description                                                                                                                                    | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-|----|------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 1  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/01_invoice_ocr.png                                                                                                                                                                       |
-| 2  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/02_invoice_ocr.jpg'                                           |
-| 3  | Invoice OCR                        | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{DATA_PATH}/open_ended_tasks/03_invoice_ocr.jpg'                                                                                                                                                   |
-| 4  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | Get data from `paperlist` table in <https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/,> and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*                                                                                                                                                                                                                                                   |
-| 5  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 获取https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html的cpi数据, 请按照这个计划一步一步执行: 1. 检测目标网页的编码类型和html结构. 2.爬取网页, 将网页正文内容去重，并转换为段落清晰适合阅读的纯文本, 并保存到target.txt. 3.设计多个正则匹配表达式来匹配target.txt中关键语句, 使用try-except语句组合各个正则匹配, 注意网页文本是中文. 4.最后使用中文总结概括关键语句回答用户的请求. **注意: 如果是代码块, 请将代码块的关键变量结果打印出来; 如果是网页文本就打印前200个字符.**                                                                                                                                                                       |
-| 6  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 爬取电子商务网站https://scrapeme.live/shop/中的商品数据并保存为csv文件。**注意: 第一步要先解析网页编码和html结构; csv中保存商品名称、价格、url、图片网址;**                                                                                                                                                                                                                                                                                                                                                                                    |
-| 7  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个*快讯*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; 3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*快讯*的标题、链接、时间; 4. 筛选最近3天的初创企业融资*快讯*, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中                                                                                                                                                                                |
-| 8  | Email reply                        | Filter through my emails and respond to them as necessary                                                                                               | You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is [@communication.microsoft.com](http://@communication.microsoft.com), please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: <englishgpt@outlook.com> Email Password: xxxx |
-| 9  | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://medium.com/.> Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. All required dependencies and environments have been fully installed and configured.                                                                                                                                                                             |
-| 10 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://pytorch.org/.> Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                           |
-| 11 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://www.kaggle.com/.> Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                    |
-| 12 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: [https://chat.openai.com/auth/login](https://chat.openai.com/auth/login.).[.](https://www.kaggle.com/.) Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.       |
-| 13 | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: <https://deepmind.google/technologies/gemini/#introduction>. Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                  |
-| 14 | Image Background Removal           | Remove the background of a given image                                                                                                                  | This is an image, you need to use python toolkit rembg remove the background of the image. image path:'{DATA_PATH}/open_ended_tasks/14_image_background_removal.jpg'; save path:'{DATA_PATH}/open_ended_tasks/14_image_background_removal.jpg'                                                                                                                                                                                                                                            |
-| 15 | Text2Img                           | Use SD tools to generate images                                                                                                                         | I want to generate an image of a beautiful girl using the stable diffusion text2image tool, sd_url = "http://your.sd.service.ip:port"                                                                                                                                                                                                                                                                                                                                                     |
-| 16 | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{DATA_PATH}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                          |
-| 17 | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{DATA_PATH}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                          |
-| 18 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied                                                                   |
-| 19 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied                                                       |
-| 20 | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Make a mouse click game that click button as many times as possible in 30 seconds using pyxel. Note: pyxel environment already satisfied                                                                                                                                                                                                                                                                                                                                                  |
+| ID | Task Name                   | Scenario                           | Scenario Description                                                                                                                                    | User Requirement                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|----|-----------------------------|------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 01 | 01_ocr                      | OCR                                | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/open_ended_tasks/01_ocr.png                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| 02 | 02_ocr                      | OCR                                | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/open_ended_tasks/02_ocr.jpg'                                                                                                                                                                                                                                                                                                                                                                    |
+| 03 | 03_ocr                      | OCR                                | Scan all the necessary fields and amounts from the given file and then create an Excel sheet with the extracted data.                                   | This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/open_ended_tasks/03_ocr.jpg'                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| 04 | 04_web_search_and_crawling  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | Get data from `paperlist` table in https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/ , and save it to a csv file. paper title must include `multiagent` or `large language model`. **notice: print key variables**                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| 05 | 05_web_search_and_crawling  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | Obtain the CPI data from https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html, please follow this plan step by step: 1. Detect the encoding type and HTML structure of the target webpage. 2. Crawl the webpage, de-duplicate the body content, convert it to a clear paragraph suitable for reading as plain text, and save it to target.txt. 3. Design multiple regular expressions to match key sentences in target.txt, use try-except statements to combine the various regular expression matches, note that the webpage text is in Chinese. 4. Finally, use a Chinese summary to summarize the key sentences to answer the user's request. **Note: If it is a code block, print out the key variable results of the code block; if it is webpage text, print the first 200 characters.** |
+| 06 | 06_web_search_and_crawling  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | Get products data from website https://scrapeme.live/shop/ and save it as a csv file. Notice: Firstly parse the web page encoding and the text HTML structure; The first page product name, price, product URL, and image URL must be saved in the csv;                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| 07 | 07_web_search_and_crawling  | Web search and crawling            | Crawling and organizing web form information                                                                                                            | 从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个**快讯**关键词后2000个字符的html内容, 作为**快讯的html内容示例**; 3. 反思**快讯的html内容示例**中的规律, 设计正则匹配表达式**来获取快讯**的标题、链接、时间; 4. 筛选最近3天的初创企业融资**快讯**, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| 08 | 08_email_reply              | Email reply                        | Filter through my emails and respond to them as necessary                                                                                               | You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is @xxx.xxx, please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: xxx@xxx.xxx Email Password: xxxx                                                                                                                                                                                                                                                                                                                                                                                         |
+| 09 | 09_web_page_imitation       | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: https://medium.com/ .  Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| 10 | 10_web_page_imitation       | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: https://pytorch.org/ .  Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| 11 | 11_web_page_imitation       | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: https://www.kaggle.com/ . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                                     |
+| 12 | 12_web_page_imitation       | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: https://chat.openai.com/auth/login . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                          |
+| 13 | 13_web_page_imitation       | Web page imitation                 | Using Selenium and WebDriver to access a webpage and convert it to an image, with the assistance of GPT-4V to mimic the creation of a one-page website. | This is a URL of webpage: https://deepmind.google/technologies/gemini/#introduction . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                   |
+| 14 | 14_image_background_removal | Image Background Removal           | Remove the background of a given image                                                                                                                  | This is an image, you need to use python toolkit rembg remove the background of the image. image path:'{data_dir}/open_ended_tasks/14_image_background_removal.jpg'; save path:'{data_dir}/open_ended_tasks/14_image_background_removal.jpg'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| 15 | 15_text2img                 | Text2Img                           | Use SD tools to generate images                                                                                                                         | I want to generate an image of a beautiful girl using the stable diffusion text2image tool, sd_url = "http://your.sd.service.ip:port"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| 16 | 16_image_2_code_generation  | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| 17 | 17_image_2_code_generation  | Image2Code Generation              | Web code generation                                                                                                                                     | This is a image. First, convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| 18 | 18_generate_games           | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied                                                                                                                                                                                                                                                                                                                                                                                   |
+| 19 | 19_generate_games           | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied                                                                                                                                                                                                                                                                                                                                                                       |
+| 20 | 20_generate_games           | Generate games using existing repo | Game tool usage (pyxel)                                                                                                                                 | Make a mouse click game that click button as many times as possible in 30 seconds using pyxel. Note: pyxel environment already satisfied                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+
+**Note**:
+1. `data_dir` is the directory where the di_dataset is stored.
+2. The specific email account and password need to be replaced with the actual email account and password in `requirements_prompt.py`.
+3. The specific sd_url need to be replaced with the actual sd_url in `requirements_prompt.py`.
+4. Codes related to "Generate games using existing repo" and Math benchmark are being integrated. Stay tuned.
diff --git a/examples/di/requirements_prompt.py b/examples/di/requirements_prompt.py
index d833ff45b..04a0414b1 100644
--- a/examples/di/requirements_prompt.py
+++ b/examples/di/requirements_prompt.py
@@ -1,25 +1,22 @@
 # ML-Benchmark requirements
 IRIS_REQ = "Run data analysis on sklearn Iris dataset, include a plot"
-DIABETES_REQ = "Run data analysis on sklearn diabetes dataset, include a plot"
 WINES_RECOGNITION_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class with 20% as test set, and show prediction accuracy"
 BREAST_CANCER_WISCONSIN_REQ = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
-TITANIC_REQ = "This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/05_titanic/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/05_titanic/split_eval.csv'."
-HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ = "This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/06_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/06_house-prices-advanced-regression-techniques/split_eval.csv'."
-SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ = "This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/07_santander-customer-transaction-prediction/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/07_santander-customer-transaction-prediction/split_eval.csv' ."
-ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ = "This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/08_icr-identify-age-related-conditions/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/08_icr-identify-age-related-conditions/split_eval.csv' ."
-SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ = "This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/09_santander-value-prediction-challenge/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/09_santander-value-prediction-challenge/split_eval.csv' ."
+TITANIC_REQ = "This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/04_titanic/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/04_titanic/split_eval.csv'."
+HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ = "This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/05_house-prices-advanced-regression-techniques/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/05_house-prices-advanced-regression-techniques/split_eval.csv'."
+SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ = "This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/06_santander-customer-transaction-prediction/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/06_santander-customer-transaction-prediction/split_eval.csv' ."
+ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ = "This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions. The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/07_icr-identify-age-related-conditions/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/07_icr-identify-age-related-conditions/split_eval.csv' ."
+SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ = "This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: '{data_dir}/di_dataset/ml_benchmark/08_santander-value-prediction-challenge/split_train.csv', eval data path: '{data_dir}/di_dataset/ml_benchmark/08_santander-value-prediction-challenge/split_eval.csv' ."
 
 # Open-Ended Tasks requirements
-INVOICE_OCR_REQ_01 = "This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/01_invoice_ocr.png"
-INVOICE_OCR_REQ_02 = "This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/02_invoice_ocr.jpg"
-INVOICE_OCR_REQ_03 = "This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/03_invoice_ocr.jpg"
-WEB_SEARCH_AND_CRAWLING_REQ_04 = "Get data from `paperlist` table in <https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/,> and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables"
-WEB_SEARCH_AND_CRAWLING_REQ_05 = "获取https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html的cpi数据, 请按照这个计划一步一步执行: 1. 检测目标网页的编码类型和html结构. 2.爬取网页, 将网页正文内容去重，并转换为段落清晰适合阅读的纯文本, 并保存到target.txt. 3.设计多个正则匹配表达式来匹配target.txt中关键语句, 使用try-except语句组合各个正则匹配, 注意网页文本是中文. 4.最后使用中文总结概括关键语句回答用户的请求. **注意: 如果是代码块, 请将代码块的关键变量结果打印出来; 如果是网页文本就打印前200个字符."
-WEB_SEARCH_AND_CRAWLING_REQ_06 = (
-    """爬取电子商务网站https://scrapeme.live/shop/ 中的商品数据并保存为csv文件。**注意: 第一步要先解析网页编码和html结构; csv中保存商品名称、价格、url、图片网址;** """
-)
-WEB_SEARCH_AND_CRAWLING_REQ_07 = "从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个*快讯*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; 3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*快讯*的标题、链接、时间; 4. 筛选最近3天的初创企业融资*快讯*, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中"
-EMAIL_REPLY_REQ_08 = """You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is [@communication.microsoft.com](http://@communication.microsoft.com), please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: <englishgpt@outlook.com> Email Password: xxxx"""
+OCR_REQ_01 = "This is an English invoice image. Your goal is to perform OCR on the image, extract the total amount from ocr result and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/01_ocr.png"
+OCR_REQ_02 = "This is a Chinese invoice image. Your goal is to perform OCR on the image and only output the recognized text word results, nothing else is needed, then extract the total amount and receipt ID starting with 'No' from ocr text words results and save as table, using PaddleOCR. The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/02_ocr.jpg"
+OCR_REQ_03 = "This is an invoice image for OCR. Your goal is to perform OCR on the image, extract the total amount and save it into an Excel table format, using PaddleOCR with lang='en' The PaddleOCR environment has been fully installed, try to use Paddleocr as much as possible. Image path: '{data_dir}/di_dataset/open_ended_tasks/03_ocr.jpg"
+WEB_SEARCH_AND_CRAWLING_REQ_04 = "Get data from `paperlist` table in https://papercopic.com/statistics/iclr-statistics/iclr-2024-statistics/ , and save it to a csv file. paper title must include `multiagent` or `large language model`. **notice: print key variables**"
+WEB_SEARCH_AND_CRAWLING_REQ_05 = "Obtain the CPI data from https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html, please follow this plan step by step: 1. Detect the encoding type and HTML structure of the target webpage. 2. Crawl the webpage, de-duplicate the body content, convert it to a clear paragraph suitable for reading as plain text, and save it to target.txt. 3. Design multiple regular expressions to match key sentences in target.txt, use try-except statements to combine the various regular expression matches, note that the webpage text is in Chinese. 4. Finally, use a Chinese summary to summarize the key sentences to answer the user's request. **Note: If it is a code block, print out the key variable results of the code block; if it is webpage text, print the first 200 characters.**"
+WEB_SEARCH_AND_CRAWLING_REQ_06 = "Get products data from website https://scrapeme.live/shop/ and save it as a csv file. Notice: Firstly parse the web page encoding and the text HTML structure; The first page product name, price, product URL, and image URL must be saved in the csv;"
+WEB_SEARCH_AND_CRAWLING_REQ_07 = "从36kr创投平台https://pitchhub.36kr.com/financing-flash所有初创企业融资的信息, **注意: 这是⼀个中⽂⽹站**; 下⾯是⼀个⼤致流程, 你会根据每⼀步的运⾏结果对当前计划中的任务做出适当调整: 1. 爬取并本地保存html结构; 2. 直接打印第7个**快讯**关键词后2000个字符的html内容, 作为**快讯的html内容示例**; 3. 反思**快讯的html内容示例**中的规律, 设计正则匹配表达式来获取**快讯**的标题、链接、时间; 4. 筛选最近3天的初创企业融资**快讯**, 以list[dict]形式打印前5个。5. 将全部结果存在本地csv中"
+EMAIL_REPLY_REQ_08 = """You are an agent that automatically reads and replies to emails. I will give you your Outlook email account and password. You need to check the content of the latest email and return it to me. If the email address suffix of this email is @xxx.xxx, please automatically reply with "I've received your email and will reply as soon as possible. Thank you!" Email account: xxx@xxx.xxx Email Password: xxxx"""
 WEB_PAGE_IMITATION_REQ_09 = "This is a URL of webpage: https://medium.com/ . Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a text file. All required dependencies and environments have been fully installed and configured."
 WEB_PAGE_IMITATION_REQ_10 = "This is a URL of webpage: https://pytorch.org/ . Firstly, utilize Selenium and WebDriver for rendering. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
 WEB_PAGE_IMITATION_REQ_11 = "This is a URL of webpage: https://www.kaggle.com/ . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
@@ -27,28 +24,27 @@ WEB_PAGE_IMITATION_REQ_12 = "This is a URL of webpage: https://chat.openai.com/a
 WEB_PAGE_IMITATION_REQ_13 = "This is a URL of webpage: https://deepmind.google/technologies/gemini/#introduction . Firstly, utilize Selenium and WebDriver to render the webpage, ensuring the browser window is maximized for an optimal viewing experience. Secondly, convert image to a webpage including HTML, CSS and JS in one go. Finally, save webpage in a file. NOTE: All required dependencies and environments have been fully installed and configured."
 IMAGE_BACKGROUND_REMOVAL_REQ_14 = "This is an image, you need to use python toolkit rembg remove the background of the image. image path:'{data_dir}/di_dataset/open_ended_tasks/14_image_background_removal.jpg'; save path:'{data_dir}/di_dataset/open_ended_tasks/14_image_background_removal_result.jpg'"
 TEXT2IMG_REQ_15 = """I want to generate an image of a beautiful girl using the stable diffusion text2image tool, sd_url = 'http://your.sd.service.ip:port'"""
-IMAGE2CODE_GENERATION_REQ_16 = "This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
-IMAGE2CODE_GENERATION_REQ_17 = "This is a image. First, check if the path exists, then convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/17_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
-GENERATE_GAMES_USING_EXISTING_REPO_REQ_18 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
-GENERATE_GAMES_USING_EXISTING_REPO_REQ_19 = "You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied"
-GENERATE_GAMES_USING_EXISTING_REPO_REQ_20 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
+IMAGE2CODE_GENERATION_REQ_16 = "This is a image. First, convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/16_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
+IMAGE2CODE_GENERATION_REQ_17 = "This is a image. First, convert the image to webpage code including HTML, CSS and JS in one go, and finally save webpage code in a file.The image path: '{data_dir}/di_dataset/open_ended_tasks/17_image_2_code_generation.png'. NOTE: All required dependencies and environments have been fully installed and configured."
+GENERATE_GAMES_REQ_18 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
+GENERATE_GAMES_REQ_19 = "You are a professional game developer, please use pyxel software to create a simple jumping game. The game needs to include a character that can move left and right on the screen. When the player presses the spacebar, the character should jump. Please ensure that the game is easy to operate, with clear graphics, and complies with the functional limitations of pyxel software. Note: pyxel environment already satisfied"
+GENERATE_GAMES_REQ_20 = "Create a Snake game. Players need to control the movement of the snake to eat food and grow its body, while avoiding the snake's head touching their own body or game boundaries. Games need to have basic game logic, user interface. During the production process, please consider factors such as playability, beautiful interface, and convenient operation of the game. Note: pyxel environment already satisfied"
 
 ML_BENCHMARK_REQUIREMENTS = {
     "01_iris": IRIS_REQ,
-    "02_diabetes": DIABETES_REQ,
-    "03_wines_recognition": WINES_RECOGNITION_REQ,
-    "04_breast_cancer_wisconsin": BREAST_CANCER_WISCONSIN_REQ,
-    "05_titanic": TITANIC_REQ,
-    "06_house-prices-advanced-regression-techniques": HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ,
-    "07_santander-customer-transaction-prediction": SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ,
-    "08_icr-identify-age-related-conditions": ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ,
-    "09_santander-value-prediction-challenge": SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ,
+    "02_wines_recognition": WINES_RECOGNITION_REQ,
+    "03_breast_cancer": BREAST_CANCER_WISCONSIN_REQ,
+    "04_titanic": TITANIC_REQ,
+    "05_house_prices": HOUSE_PRICES_ADVANCED_REGRESSION_TECHNIQUES_REQ,
+    "06_santander_customer": SANTANDER_CUSTOMER_TRANSACTION_PREDICTION_REQ,
+    "07_icr_identify": ICR_IDENTITY_AGE_RELATED_CONDITIONS_REQ,
+    "08_santander_value": SANTANDER_VALUE_PREDICTION_CHALLENGE_REQ,
 }
 
 OPEN_ENDED_TASKS_REQUIREMENTS = {
-    "01_invoice_ocr": INVOICE_OCR_REQ_01,
-    "02_invoice_ocr": INVOICE_OCR_REQ_02,
-    "03_invoice_ocr": INVOICE_OCR_REQ_03,
+    "01_ocr": OCR_REQ_01,
+    "02_ocr": OCR_REQ_02,
+    "03_ocr": OCR_REQ_03,
     "04_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_04,
     "05_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_05,
     "06_web_search_and_crawling": WEB_SEARCH_AND_CRAWLING_REQ_06,
@@ -63,7 +59,7 @@ OPEN_ENDED_TASKS_REQUIREMENTS = {
     "15_text2img": TEXT2IMG_REQ_15,
     "16_image_2_code_generation": IMAGE2CODE_GENERATION_REQ_16,
     "17_image_2_code_generation": IMAGE2CODE_GENERATION_REQ_17,
-    "18_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_18,
-    "19_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_19,
-    "20_generate_games_using_existing_repo": GENERATE_GAMES_USING_EXISTING_REPO_REQ_20,
+    "18_generate_games": GENERATE_GAMES_REQ_18,
+    "19_generate_games": GENERATE_GAMES_REQ_19,
+    "20_generate_games": GENERATE_GAMES_REQ_20,
 }
diff --git a/examples/di/run_ml_benchmark.py b/examples/di/run_ml_benchmark.py
index ead2638f9..327ab986e 100644
--- a/examples/di/run_ml_benchmark.py
+++ b/examples/di/run_ml_benchmark.py
@@ -5,6 +5,7 @@ import fire
 from examples.di.requirements_prompt import ML_BENCHMARK_REQUIREMENTS
 from metagpt.const import DATA_PATH
 from metagpt.roles.di.data_interpreter import DataInterpreter
+from metagpt.tools.tool_recommend import TypeMatchToolRecommender
 
 
 # Ensure ML-Benchmark dataset has been downloaded before using these example.
@@ -13,7 +14,7 @@ async def main(task_name, data_dir=DATA_PATH, use_reflection=True):
         raise FileNotFoundError(f"ML-Benchmark dataset not found in {data_dir}.")
 
     requirement = ML_BENCHMARK_REQUIREMENTS[task_name].format(data_dir=data_dir)
-    di = DataInterpreter(use_reflection=use_reflection)
+    di = DataInterpreter(use_reflection=use_reflection, tool_recommender=TypeMatchToolRecommender(tools=["<all>"]))
     await di.run(requirement)
 
 
diff --git a/examples/di/run_open_ended_tasks.py b/examples/di/run_open_ended_tasks.py
index 19f0703fc..abe10015e 100644
--- a/examples/di/run_open_ended_tasks.py
+++ b/examples/di/run_open_ended_tasks.py
@@ -5,6 +5,7 @@ import fire
 from examples.di.requirements_prompt import OPEN_ENDED_TASKS_REQUIREMENTS
 from metagpt.const import DATA_PATH
 from metagpt.roles.di.data_interpreter import DataInterpreter
+from metagpt.tools.tool_recommend import TypeMatchToolRecommender
 
 
 # Ensure Open-Ended Tasks dataset has been downloaded before using this example.
@@ -13,7 +14,7 @@ async def main(task_name, data_dir=DATA_PATH, use_reflection=True):
         raise FileNotFoundError(f"Open-ended task dataset not found in {data_dir}.")
 
     requirement = OPEN_ENDED_TASKS_REQUIREMENTS[task_name].format(data_dir=data_dir)
-    di = DataInterpreter(use_reflection=use_reflection)
+    di = DataInterpreter(use_reflection=use_reflection, tool_recommender=TypeMatchToolRecommender(tools=["<all>"]))
     await di.run(requirement)
 
 

From 28d293d4904b22cd4f0cb11d4fa682ea8504eac7 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 25 Mar 2024 15:57:31 +0800
Subject: [PATCH 08/12] docstring are usually english, discard jieba tokenizer

---
 metagpt/tools/tool_recommend.py | 5 ++---
 requirements.txt                | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py
index 69b9a4b5d..d8b6040db 100644
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import json
 from typing import Any
 
-import jieba
 import numpy as np
 from pydantic import BaseModel, field_validator
 from rank_bm25 import BM25Okapi
@@ -182,7 +181,7 @@ class BM25ToolRecommender(ToolRecommender):
         self.bm25 = BM25Okapi(tokenized_corpus)
 
     def _tokenize(self, text):
-        return jieba.lcut(text)  # FIXME: needs more sophisticated tokenization
+        return text.split()  # FIXME: needs more sophisticated tokenization
 
     async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
         query = plan.current_task.instruction if plan else context
@@ -193,7 +192,7 @@ class BM25ToolRecommender(ToolRecommender):
         recalled_tools = [list(self.tools.values())[index] for index in top_indexes]
 
         logger.info(
-            f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[doc_scores[index] for index in top_indexes]}"
+            f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[np.round(doc_scores[index], 4) for index in top_indexes]}"
         )
 
         return recalled_tools
diff --git a/requirements.txt b/requirements.txt
index d0ee8c95c..c97d4b2f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -71,5 +71,4 @@ Pillow
 imap_tools==1.5.0  # Used by metagpt/tools/libs/email_login.py
 qianfan==0.3.2
 dashscope==1.14.1
-rank-bm25==0.2.2  # for tool recommendation
-jieba==0.42.1  # for tool recommendation
\ No newline at end of file
+rank-bm25==0.2.2  # for tool recommendation
\ No newline at end of file

From 67464c37f11d854da43db99ad82a95bb81018962 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 25 Mar 2024 16:09:31 +0800
Subject: [PATCH 09/12] use ast to parse code texts and register a whole repo
 as tools

---
 metagpt/tools/tool_convert.py  | 80 +++++++++++++++++++++++++++++++++-
 metagpt/tools/tool_registry.py | 76 +++++++++++++-------------------
 2 files changed, 110 insertions(+), 46 deletions(-)

diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index d366bef41..529f5ec14 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -1,3 +1,4 @@
+import ast
 import inspect
 
 from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces
@@ -5,7 +6,8 @@ from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces
 PARSER = GoogleDocstringParser
 
 
-def convert_code_to_tool_schema(obj, include: list[str] = None):
+def convert_code_to_tool_schema(obj, include: list[str] = None) -> dict:
+    """Converts an object (function or class) to a tool schema by inspecting the object"""
     docstring = inspect.getdoc(obj)
     # assert docstring, "no docstring found for the objects, skip registering"
 
@@ -27,6 +29,23 @@ def convert_code_to_tool_schema(obj, include: list[str] = None):
     return schema
 
 
+def convert_code_to_tool_schema_ast(code: str) -> list[dict]:
+    """Converts a code string to a list of tool schemas by parsing the code with AST"""
+
+    # Modify the AST nodes to include parent references, enabling to attach methods to their class
+    def add_parent_references(node, parent=None):
+        for child in ast.iter_child_nodes(node):
+            child.parent = parent
+            add_parent_references(child, parent=node)
+
+    visitor = CodeVisitor()
+    parsed_code = ast.parse(code)
+    add_parent_references(parsed_code)
+    visitor.visit(parsed_code)
+
+    return visitor.get_tool_schemas()
+
+
 def function_docstring_to_schema(fn_obj, docstring) -> dict:
     """
     Converts a function's docstring into a schema dictionary.
@@ -62,3 +81,62 @@ def get_class_method_docstring(cls, method_name):
             if method.__doc__:
                 return method.__doc__
     return None  # No docstring found in the class hierarchy
+
+
+class CodeVisitor(ast.NodeVisitor):
+    """Visit and convert the AST nodes within a code file to tool schemas"""
+
+    def __init__(self):
+        self.tool_schemas = {}  # {tool_name: tool_schema}
+
+    def visit_ClassDef(self, node):
+        class_schemas = {"type": "class", "description": remove_spaces(ast.get_docstring(node)), "methods": {}}
+        for body_node in node.body:
+            if isinstance(body_node, (ast.FunctionDef, ast.AsyncFunctionDef)) and (
+                not body_node.name.startswith("_") or body_node.name == "__init__"
+            ):
+                func_schemas = self._get_function_schemas(body_node)
+                class_schemas["methods"].update({body_node.name: func_schemas})
+        self.tool_schemas[node.name] = class_schemas
+
+    def visit_FunctionDef(self, node):
+        if isinstance(node.parent, ast.ClassDef) or node.name.startswith("_"):
+            return
+        self.tool_schemas[node.name] = self._get_function_schemas(node)
+
+    def visit_AsyncFunctionDef(self, node):
+        if isinstance(node.parent, ast.ClassDef) or node.name.startswith("_"):
+            return
+        self.tool_schemas[node.name] = self._get_function_schemas(node)
+
+    def _get_function_schemas(self, node):
+        docstring = remove_spaces(ast.get_docstring(node))
+        overall_desc, param_desc = PARSER.parse(docstring)
+        return {
+            "type": "async_function" if isinstance(node, ast.AsyncFunctionDef) else "function",
+            "description": overall_desc,
+            "signature": self._get_function_signature(node),
+            "parameters": param_desc,
+        }
+
+    def _get_function_signature(self, node):
+        args = []
+        defaults = dict(zip([arg.arg for arg in node.args.args][-len(node.args.defaults) :], node.args.defaults))
+        for arg in node.args.args:
+            arg_str = arg.arg
+            if arg.annotation:
+                annotation = ast.unparse(arg.annotation)
+                arg_str += f": {annotation}"
+            if arg.arg in defaults:
+                default_value = ast.unparse(defaults[arg.arg])
+                arg_str += f" = {default_value}"
+            args.append(arg_str)
+
+        return_annotation = ""
+        if node.returns:
+            return_annotation = f" -> {ast.unparse(node.returns)}"
+
+        return f"({' ,'.join(args)}){return_annotation}"
+
+    def get_tool_schemas(self):
+        return self.tool_schemas
diff --git a/metagpt/tools/tool_registry.py b/metagpt/tools/tool_registry.py
index e3d270b79..2fc44a2e8 100644
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@@ -7,17 +7,20 @@
 """
 from __future__ import annotations
 
-import importlib.util
 import inspect
 import os
 from collections import defaultdict
+from pathlib import Path
 
 import yaml
 from pydantic import BaseModel
 
 from metagpt.const import TOOL_SCHEMA_PATH
 from metagpt.logs import logger
-from metagpt.tools.tool_convert import convert_code_to_tool_schema
+from metagpt.tools.tool_convert import (
+    convert_code_to_tool_schema,
+    convert_code_to_tool_schema_ast,
+)
 from metagpt.tools.tool_data_type import Tool, ToolSchema
 
 
@@ -27,21 +30,23 @@ class ToolRegistry(BaseModel):
 
     def register_tool(
         self,
-        tool_name,
-        tool_path,
-        schema_path="",
-        tool_code="",
-        tags=None,
-        tool_source_object=None,
-        include_functions=None,
-        verbose=False,
+        tool_name: str,
+        tool_path: str,
+        schemas: dict = None,
+        schema_path: str = "",
+        tool_code: str = "",
+        tags: list[str] = None,
+        tool_source_object=None,  # can be any classes or functions
+        include_functions: list[str] = None,
+        verbose: bool = False,
     ):
         if self.has_tool(tool_name):
             return
 
         schema_path = schema_path or TOOL_SCHEMA_PATH / f"{tool_name}.yml"
 
-        schemas = make_schema(tool_source_object, include_functions, schema_path)
+        if not schemas:
+            schemas = make_schema(tool_source_object, include_functions, schema_path)
 
         if not schemas:
             return
@@ -117,9 +122,6 @@ def make_schema(tool_source_object, include, path):
         schema = convert_code_to_tool_schema(tool_source_object, include=include)
         with open(path, "w", encoding="utf-8") as f:
             yaml.dump(schema, f, sort_keys=False)
-        # import json
-        # with open(str(path).replace("yml", "json"), "w", encoding="utf-8") as f:
-        #     json.dump(schema, f, ensure_ascii=False, indent=4)
     except Exception as e:
         schema = {}
         logger.error(f"Fail to make schema: {e}")
@@ -144,46 +146,30 @@ def validate_tool_names(tools: list[str]) -> dict[str, Tool]:
     return valid_tools
 
 
-def load_module_from_file(filepath):
-    module_name = os.path.splitext(os.path.basename(filepath))[0]
-    spec = importlib.util.spec_from_file_location(module_name, filepath)
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)
-    return module
-
-
 def register_tools_from_file(file_path) -> dict[str, Tool]:
+    file_name = Path(file_path).name
+    if not file_name.endswith(".py") or file_name == "setup.py" or file_name.startswith("test"):
+        return {}
     registered_tools = {}
-    module = load_module_from_file(file_path)
-    for name, obj in inspect.getmembers(module):
-        if inspect.isclass(obj) or inspect.isfunction(obj):
-            if obj.__module__ == module.__name__:
-                # excluding imported classes and functions, register only those defined in the file
-                if "metagpt" in file_path:
-                    # split to handle ../metagpt/metagpt/tools/... where only metapgt/tools/... is needed
-                    file_path = "metagpt" + file_path.split("metagpt")[-1]
-
-                TOOL_REGISTRY.register_tool(
-                    tool_name=name,
-                    tool_path=file_path,
-                    tool_code="",  # inspect.getsource(obj) will resulted in TypeError, skip it for now
-                    tool_source_object=obj,
-                )
-                registered_tools.update({name: TOOL_REGISTRY.get_tool(name)})
-
+    code = Path(file_path).read_text(encoding="utf-8")
+    tool_schemas = convert_code_to_tool_schema_ast(code)
+    for name, schemas in tool_schemas.items():
+        TOOL_REGISTRY.register_tool(
+            tool_name=name,
+            tool_path=file_path,
+            schemas=schemas,
+        )
+        registered_tools.update({name: TOOL_REGISTRY.get_tool(name)})
     return registered_tools
 
 
 def register_tools_from_path(path) -> dict[str, Tool]:
     tools_registered = {}
-    if os.path.isfile(path) and path.endswith(".py"):
-        # Path is a Python file
+    if os.path.isfile(path):
         tools_registered.update(register_tools_from_file(path))
     elif os.path.isdir(path):
-        # Path is a directory
         for root, _, files in os.walk(path):
             for file in files:
-                if file.endswith(".py"):
-                    file_path = os.path.join(root, file)
-                    tools_registered.update(register_tools_from_file(file_path))
+                file_path = os.path.join(root, file)
+                tools_registered.update(register_tools_from_file(file_path))
     return tools_registered

From a2493d99f12a872e54793ba5c26e5aaf538987d4 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 25 Mar 2024 17:35:12 +0800
Subject: [PATCH 10/12] add tool code for ast parsing

---
 metagpt/tools/tool_convert.py  | 17 +++++++++++------
 metagpt/tools/tool_registry.py |  2 ++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index 529f5ec14..e6894762a 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -38,7 +38,7 @@ def convert_code_to_tool_schema_ast(code: str) -> list[dict]:
             child.parent = parent
             add_parent_references(child, parent=node)
 
-    visitor = CodeVisitor()
+    visitor = CodeVisitor(code)
     parsed_code = ast.parse(code)
     add_parent_references(parsed_code)
     visitor.visit(parsed_code)
@@ -86,8 +86,9 @@ def get_class_method_docstring(cls, method_name):
 class CodeVisitor(ast.NodeVisitor):
     """Visit and convert the AST nodes within a code file to tool schemas"""
 
-    def __init__(self):
+    def __init__(self, source_code: str):
         self.tool_schemas = {}  # {tool_name: tool_schema}
+        self.source_code = source_code
 
     def visit_ClassDef(self, node):
         class_schemas = {"type": "class", "description": remove_spaces(ast.get_docstring(node)), "methods": {}}
@@ -97,17 +98,21 @@ class CodeVisitor(ast.NodeVisitor):
             ):
                 func_schemas = self._get_function_schemas(body_node)
                 class_schemas["methods"].update({body_node.name: func_schemas})
+        class_schemas["code"] = ast.get_source_segment(self.source_code, node)
         self.tool_schemas[node.name] = class_schemas
 
     def visit_FunctionDef(self, node):
-        if isinstance(node.parent, ast.ClassDef) or node.name.startswith("_"):
-            return
-        self.tool_schemas[node.name] = self._get_function_schemas(node)
+        self._visit_function(node)
 
     def visit_AsyncFunctionDef(self, node):
+        self._visit_function(node)
+
+    def _visit_function(self, node):
         if isinstance(node.parent, ast.ClassDef) or node.name.startswith("_"):
             return
-        self.tool_schemas[node.name] = self._get_function_schemas(node)
+        function_schemas = self._get_function_schemas(node)
+        function_schemas["code"] = ast.get_source_segment(self.source_code, node)
+        self.tool_schemas[node.name] = function_schemas
 
     def _get_function_schemas(self, node):
         docstring = remove_spaces(ast.get_docstring(node))
diff --git a/metagpt/tools/tool_registry.py b/metagpt/tools/tool_registry.py
index 2fc44a2e8..50875e235 100644
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@@ -154,10 +154,12 @@ def register_tools_from_file(file_path) -> dict[str, Tool]:
     code = Path(file_path).read_text(encoding="utf-8")
     tool_schemas = convert_code_to_tool_schema_ast(code)
     for name, schemas in tool_schemas.items():
+        tool_code = schemas.pop("code", "")
         TOOL_REGISTRY.register_tool(
             tool_name=name,
             tool_path=file_path,
             schemas=schemas,
+            tool_code=tool_code,
         )
         registered_tools.update({name: TOOL_REGISTRY.get_tool(name)})
     return registered_tools

From 8b371eaad3b7cc6505dd51aedb9198836bd6fea7 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Wed, 27 Mar 2024 11:41:04 +0800
Subject: [PATCH 11/12] add test cases for tool convert

---
 metagpt/tools/tool_convert.py            |  2 +-
 tests/metagpt/tools/test_tool_convert.py | 93 +++++++++++++++++++++++-
 2 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index e6894762a..7d7374fde 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -141,7 +141,7 @@ class CodeVisitor(ast.NodeVisitor):
         if node.returns:
             return_annotation = f" -> {ast.unparse(node.returns)}"
 
-        return f"({' ,'.join(args)}){return_annotation}"
+        return f"({', '.join(args)}){return_annotation}"
 
     def get_tool_schemas(self):
         return self.tool_schemas
diff --git a/tests/metagpt/tools/test_tool_convert.py b/tests/metagpt/tools/test_tool_convert.py
index 061a619ce..4798d32b0 100644
--- a/tests/metagpt/tools/test_tool_convert.py
+++ b/tests/metagpt/tools/test_tool_convert.py
@@ -2,7 +2,10 @@ from typing import Literal, Union
 
 import pandas as pd
 
-from metagpt.tools.tool_convert import convert_code_to_tool_schema
+from metagpt.tools.tool_convert import (
+    convert_code_to_tool_schema,
+    convert_code_to_tool_schema_ast,
+)
 
 
 class DummyClass:
@@ -128,3 +131,91 @@ def test_convert_code_to_tool_schema_function():
 def test_convert_code_to_tool_schema_async_function():
     schema = convert_code_to_tool_schema(dummy_async_fn)
     assert schema.get("type") == "async_function"
+
+
+TEST_CODE_FILE_TEXT = '''
+import pandas as pd  # imported obj should not be parsed
+from some_module1 import some_imported_function, SomeImportedClass  # imported obj should not be parsed
+from ..some_module2 import some_imported_function2  # relative import should not result in an error
+
+class MyClass:
+    """This is a MyClass docstring."""
+    def __init__(self, arg1):
+        """This is the constructor docstring."""
+        self.arg1 = arg1
+
+    def my_method(self, arg2: Union[list[str], str], arg3: pd.DataFrame, arg4: int = 1, arg5: Literal["a","b","c"] = "a") -> Tuple[int, str]:
+        """
+        This is a method docstring.
+        
+        Args:
+            arg2 (Union[list[str], str]): A union of a list of strings and a string.
+            ...
+        
+        Returns:
+            Tuple[int, str]: A tuple of an integer and a string.
+        """
+        return self.arg4 + arg5
+    
+    async def my_async_method(self, some_arg) -> str:
+        return "hi"
+    
+    def _private_method(self):  # private should not be parsed
+        return "private"
+
+def my_function(arg1, arg2) -> dict:
+    """This is a function docstring."""
+    return arg1 + arg2
+
+def my_async_function(arg1, arg2) -> dict:
+    return arg1 + arg2
+
+def _private_function():  # private should not be parsed
+    return "private"
+'''
+
+
+def test_convert_code_to_tool_schema_ast():
+    expected = {
+        "MyClass": {
+            "type": "class",
+            "description": "This is a MyClass docstring.",
+            "methods": {
+                "__init__": {
+                    "type": "function",
+                    "description": "This is the constructor docstring.",
+                    "signature": "(self, arg1)",
+                    "parameters": "",
+                },
+                "my_method": {
+                    "type": "function",
+                    "description": "This is a method docstring. ",
+                    "signature": "(self, arg2: Union[list[str], str], arg3: pd.DataFrame, arg4: int = 1, arg5: Literal['a', 'b', 'c'] = 'a') -> Tuple[int, str]",
+                    "parameters": "Args: arg2 (Union[list[str], str]): A union of a list of strings and a string. ... Returns: Tuple[int, str]: A tuple of an integer and a string.",
+                },
+                "my_async_method": {
+                    "type": "async_function",
+                    "description": "",
+                    "signature": "(self, some_arg) -> str",
+                    "parameters": "",
+                },
+            },
+            "code": 'class MyClass:\n    """This is a MyClass docstring."""\n    def __init__(self, arg1):\n        """This is the constructor docstring."""\n        self.arg1 = arg1\n\n    def my_method(self, arg2: Union[list[str], str], arg3: pd.DataFrame, arg4: int = 1, arg5: Literal["a","b","c"] = "a") -> Tuple[int, str]:\n        """\n        This is a method docstring.\n        \n        Args:\n            arg2 (Union[list[str], str]): A union of a list of strings and a string.\n            ...\n        \n        Returns:\n            Tuple[int, str]: A tuple of an integer and a string.\n        """\n        return self.arg4 + arg5\n    \n    async def my_async_method(self, some_arg) -> str:\n        return "hi"\n    \n    def _private_method(self):  # private should not be parsed\n        return "private"',
+        },
+        "my_function": {
+            "type": "function",
+            "description": "This is a function docstring.",
+            "signature": "(arg1, arg2) -> dict",
+            "parameters": "",
+            "code": 'def my_function(arg1, arg2) -> dict:\n    """This is a function docstring."""\n    return arg1 + arg2',
+        },
+        "my_async_function": {
+            "type": "function",
+            "description": "",
+            "signature": "(arg1, arg2) -> dict",
+            "parameters": "",
+            "code": "def my_async_function(arg1, arg2) -> dict:\n    return arg1 + arg2",
+        },
+    }
+    schemas = convert_code_to_tool_schema_ast(TEST_CODE_FILE_TEXT)
+    assert schemas == expected

From d5c3c5a14747afa3759ccdbf6bdb2a43e59fbb38 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Wed, 27 Mar 2024 20:08:11 +0800
Subject: [PATCH 12/12] rm unnecessary condition

---
 metagpt/tools/tool_convert.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index 7d7374fde..829269b1b 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -32,15 +32,8 @@ def convert_code_to_tool_schema(obj, include: list[str] = None) -> dict:
 def convert_code_to_tool_schema_ast(code: str) -> list[dict]:
     """Converts a code string to a list of tool schemas by parsing the code with AST"""
 
-    # Modify the AST nodes to include parent references, enabling to attach methods to their class
-    def add_parent_references(node, parent=None):
-        for child in ast.iter_child_nodes(node):
-            child.parent = parent
-            add_parent_references(child, parent=node)
-
     visitor = CodeVisitor(code)
     parsed_code = ast.parse(code)
-    add_parent_references(parsed_code)
     visitor.visit(parsed_code)
 
     return visitor.get_tool_schemas()
@@ -108,7 +101,7 @@ class CodeVisitor(ast.NodeVisitor):
         self._visit_function(node)
 
     def _visit_function(self, node):
-        if isinstance(node.parent, ast.ClassDef) or node.name.startswith("_"):
+        if node.name.startswith("_"):
             return
         function_schemas = self._get_function_schemas(node)
         function_schemas["code"] = ast.get_source_segment(self.source_code, node)