diff --git a/tests/data/rsp_cache.json b/tests/data/rsp_cache.json index b1f083185..e5f03d9cb 100644 --- a/tests/data/rsp_cache.json +++ b/tests/data/rsp_cache.json @@ -417,5 +417,22 @@ "\n## context\nCreate a 2048 game\n\n-----\n\n## format example\n[CONTENT]\n{\n \"issue_type\": \"BUG\"\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- issue_type: # Answer BUG/REQUIREMENT. If it is a bugfix, answer BUG, otherwise answer Requirement\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"issue_type\": \"REQUIREMENT\"\n}\n[/CONTENT]", "\n## context\nCreate a 2048 game\n\n-----\n\n## format example\n[CONTENT]\n{\n \"Product Goals\": [\n \"Create an engaging user experience\",\n \"Improve accessibility, be responsive\",\n \"More beautiful UI\"\n ]\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- Product Goals: typing.List[str] # Provide up to three clear, orthogonal product goals.\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"Product Goals\": [\n \"Create a challenging and addictive gameplay\",\n \"Enhance accessibility and responsiveness for all users\",\n \"Implement visually appealing UI design\"\n ]\n}\n[/CONTENT]", "\n## context\nCreate a 2048 game\n\n-----\n\n## format example\n[CONTENT]\n{\n \"Competitive Analysis\": [\n \"2048 Game A: Simple interface, lacks responsive features\",\n \"play2048.co: Beautiful and responsive UI with my best score shown\",\n \"2048game.com: Responsive UI with my best score shown, but many ads\"\n ]\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- Competitive Analysis: typing.List[str] # Provide 5 to 7 competitive products.\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"Competitive Analysis\": [\n \"2048 Game A: Simple interface, lacks responsive features\",\n \"play2048.co: Beautiful and responsive UI with my best score shown\",\n \"2048game.com: Responsive UI with my best score shown, but many ads\"\n ]\n}\n[/CONTENT]", - "\n## context\nCreate a 2048 game\n\n-----\n\n## format example\n[CONTENT]\n{\n \"Requirement Pool\": [\n [\n \"P0\",\n \"The main code ...\"\n ],\n [\n \"P0\",\n \"The game algorithm ...\"\n ]\n ]\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- Requirement Pool: typing.List[typing.List[str]] # List down the top-5 requirements with their priority (P0, P1, P2).\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"Requirement Pool\": [\n [\n \"P0\",\n \"The main code should handle user input and update the game board accordingly.\"\n ],\n [\n \"P0\",\n \"The game algorithm should handle the merging of tiles and the generation of new tiles with values of 2 or 4.\"\n ],\n [\n \"P1\",\n \"The game should end when the player achieves the 2048 tile or when there are no possible moves left.\"\n ],\n [\n \"P1\",\n \"The game should display the current score and the highest tile achieved by the player.\"\n ],\n [\n \"P2\",\n \"The game should have a smooth and visually appealing user interface.\"\n ]\n ]\n}\n[/CONTENT]" + "\n## context\nCreate a 2048 game\n\n-----\n\n## format example\n[CONTENT]\n{\n \"Requirement Pool\": [\n [\n \"P0\",\n \"The main code ...\"\n ],\n [\n \"P0\",\n \"The game algorithm ...\"\n ]\n ]\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- Requirement Pool: typing.List[typing.List[str]] # List down the top-5 requirements with their priority (P0, P1, P2).\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"Requirement Pool\": [\n [\n \"P0\",\n \"The main code should handle user input and update the game board accordingly.\"\n ],\n [\n \"P0\",\n \"The game algorithm should handle the merging of tiles and the generation of new tiles with values of 2 or 4.\"\n ],\n [\n \"P1\",\n \"The game should end when the player achieves the 2048 tile or when there are no possible moves left.\"\n ],\n [\n \"P1\",\n \"The game should display the current score and the highest tile achieved by the player.\"\n ],\n [\n \"P2\",\n \"The game should have a smooth and visually appealing user interface.\"\n ]\n ]\n}\n[/CONTENT]", + "[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Import the Iris dataset from scikit-learn.\\nTask 2: Perform exploratory data analysis to understand the dataset.\\nTask 3: Preprocess the data if necessary (e.g., scaling, encoding).\\nTask 4: Split the dataset into training and testing sets.\\nTask 5: Choose a suitable model and train it on the dataset.\\nTask 6: Evaluate the model's performance on the test set.\\nTask 7: Report the results of the analysis.\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": { + "task_type": [ + "other", + "eda", + "data_preprocess", + "data_preprocess", + "model_train", + "model_evaluate", + "other" + ] + }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset features.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code_steps\\\":\\\"\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "import pandas as pd\n\n# Convert the loaded Iris dataset to a DataFrame for easier manipulation\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\niris_df['target'] = iris_data.target\n\n# Display basic information about the dataset\niris_df_info = iris_df.info()\n\n# Display statistical summary of the dataset\niris_df_description = iris_df.describe()\n\n# Show the first few rows of the dataset\ndf_head = iris_df.head()\n\n# Output the results\ndisplay(iris_df_info, iris_df_description, df_head)" + }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"import pandas as pd\\\\n\\\\n# Convert the loaded Iris dataset to a DataFrame for easier manipulation\\\\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\\\\niris_df['target'] = iris_data.target\\\\n\\\\n# Display basic information about the dataset\\\\niris_df_info = iris_df.info()\\\\n\\\\n# Display statistical summary of the dataset\\\\niris_df_description = iris_df.describe()\\\\n\\\\n# Show the first few rows of the dataset\\\\ndf_head = iris_df.head()\\\\n\\\\n# Output the results\\\\ndisplay(iris_df_info, iris_df_description, df_head)\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset features.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset features.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code_steps\\\":\\\"\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "import seaborn as sns\nimport matplotlib.pyplot as plt\n\n# Pairplot to visualize the relationships between features\nsns.pairplot(iris_df, hue='target', diag_kind='kde')\n\n# Show the plot\nplt.show()" + } } \ No newline at end of file diff --git a/tests/metagpt/actions/test_ask_review.py b/tests/metagpt/actions/test_ask_review.py new file mode 100644 index 000000000..00001fad6 --- /dev/null +++ b/tests/metagpt/actions/test_ask_review.py @@ -0,0 +1,12 @@ +import pytest + +from metagpt.actions.ask_review import AskReview + + +@pytest.mark.asyncio +async def test_ask_review(mocker): + mock_review_input = "confirm" + mocker.patch("builtins.input", return_value=mock_review_input) + rsp, confirmed = await AskReview().run() + assert rsp == mock_review_input + assert confirmed diff --git a/tests/metagpt/actions/test_write_plan.py b/tests/metagpt/actions/test_write_plan.py index 9abc6c798..f36527711 100644 --- a/tests/metagpt/actions/test_write_plan.py +++ b/tests/metagpt/actions/test_write_plan.py @@ -23,8 +23,11 @@ def test_precheck_update_plan_from_rsp(): @pytest.mark.asyncio -async def test_write_plan(): - rsp = await WritePlan().run(context=[Message("run analysis on sklearn iris dataset", role="user")]) +@pytest.mark.parametrize("use_tools", [(False), (True)]) +async def test_write_plan(use_tools): + rsp = await WritePlan().run( + context=[Message("run analysis on sklearn iris dataset", role="user")], use_tools=use_tools + ) assert "task_id" in rsp assert "instruction" in rsp diff --git a/tests/metagpt/roles/test_code_interpreter.py b/tests/metagpt/roles/test_code_interpreter.py index b78f7a9ef..dd959525e 100644 --- a/tests/metagpt/roles/test_code_interpreter.py +++ b/tests/metagpt/roles/test_code_interpreter.py @@ -5,11 +5,15 @@ from metagpt.roles.code_interpreter import CodeInterpreter @pytest.mark.asyncio -async def test_code_interpreter(): +@pytest.mark.parametrize("auto_run", [(True), (False)]) +async def test_code_interpreter(mocker, auto_run): + mocker.patch("metagpt.actions.execute_code.ExecutePyCode.run", return_value=("a successful run", True)) + mocker.patch("builtins.input", return_value="confirm") + requirement = "Run data analysis on sklearn Iris dataset, include a plot" tools = [] - ci = CodeInterpreter(auto_run=True, use_tools=True, tools=tools) + ci = CodeInterpreter(auto_run=auto_run, use_tools=True, tools=tools) rsp = await ci.run(requirement) logger.info(rsp) assert len(rsp.content) > 0