diff --git a/metagpt/actions/ci/execute_nb_code.py b/metagpt/actions/ci/execute_nb_code.py index 6a8c32b7f..0ff00de8f 100644 --- a/metagpt/actions/ci/execute_nb_code.py +++ b/metagpt/actions/ci/execute_nb_code.py @@ -39,10 +39,9 @@ class ExecuteNbCode(Action): def __init__( self, - nb=None, + nb=nbformat.v4.new_notebook(), timeout=600, ): - nb = nb or nbformat.v4.new_notebook() super().__init__( nb=nb, nb_client=NotebookClient(nb, timeout=timeout), @@ -199,17 +198,10 @@ class ExecuteNbCode(Action): def truncate(result: str, keep_len: int = 2000, is_success: bool = True): """对于超出keep_len个字符的result: 执行失败的代码, 展示result后keep_len个字符; 执行成功的代码, 展示result前keep_len个字符。""" - desc = f"Executed code {'successfully. ' if is_success else 'failed, please reflect the cause of bug and then debug. '}" - is_same_desc = False - if is_success: - desc += f"Truncated to show only first {keep_len} characters\n" + desc = f"Executed code successfully. Truncated to show only first {keep_len} characters\n" else: - desc += f"Truncated to show only last {keep_len} characters\n" - - if result.startswith(desc): - result = result[len(desc) :] - is_same_desc = True + desc = f"Executed code failed, please reflect the cause of bug and then debug. Truncated to show only last {keep_len} characters\n" if result.strip().startswith(" dict: @@ -71,18 +71,15 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): """ candidate_tools = TOOL_REGISTRY.get_tools_by_type(tool_type) if self.selected_tools: - candidate_tools = { - tool_name: candidate_tools[tool_name] - for tool_name in self.selected_tools - if tool_name in candidate_tools - } + candidate_tool_names = set(self.selected_tools) & candidate_tools.keys() + candidate_tools = {tool_name: candidate_tools[tool_name] for tool_name in candidate_tool_names} return candidate_tools async def _recommend_tool( self, task: str, available_tools: dict, - ) -> list: + ) -> dict: """ Recommend tools for the specified task. diff --git a/metagpt/actions/ci/write_plan.py b/metagpt/actions/ci/write_plan.py index e88f64724..dd9363260 100644 --- a/metagpt/actions/ci/write_plan.py +++ b/metagpt/actions/ci/write_plan.py @@ -49,19 +49,19 @@ class WritePlan(Action): tasks (list[dict]): tasks to be assigned task type Returns: - list[dict]: tasks with task type assigned + str: tasks with task type assigned in a json string """ - task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks]) + task_info = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks]) task_type_desc = "\n".join( [f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()] ) # task type are binded with tool type now, should be improved in the future prompt = ASSIGN_TASK_TYPE_PROMPT.format( - task_list=task_list, task_type_desc=task_type_desc + task_info=task_info, task_type_desc=task_type_desc ) # task types are set to be the same as tool types, for now tool_config = create_func_call_config(ASSIGN_TASK_TYPE_CONFIG) rsp = await self.llm.aask_code(prompt, **tool_config) task_type_list = rsp["task_type"] - print(f"assigned task types: {task_type_list}") + logger.info(f"assigned task types: {task_type_list}") for task, task_type in zip(tasks, task_type_list): task["task_type"] = task_type return json.dumps(tasks) diff --git a/metagpt/prompts/ci/write_analysis_code.py b/metagpt/prompts/ci/write_analysis_code.py index 15d8b1443..4eccefcd1 100644 --- a/metagpt/prompts/ci/write_analysis_code.py +++ b/metagpt/prompts/ci/write_analysis_code.py @@ -1,6 +1,6 @@ ASSIGN_TASK_TYPE_PROMPT = """ Please assign a task type to each task in the list below from the given categories: -{task_list} +{task_info} ## All Task Type: {task_type_desc} diff --git a/tests/data/rsp_cache.json b/tests/data/rsp_cache.json index c5f2e9643..d6cbe60e7 100644 --- a/tests/data/rsp_cache.json +++ b/tests/data/rsp_cache.json @@ -388,5 +388,11 @@ "[{\"role\": \"system\", \"content\": \"You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation \"}, {\"role\": \"user\", \"content\": \"\\nHere is an example for you.\\n\\nExample 1:\\n[previous impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n \\\"\\\"\\\"\\n Given integers a and b, return the total value of a and b.\\n \\\"\\\"\\\"\\n return a - b\\n```\\n\\n[runtime Error]:\\nTested passed:\\n\\nTests failed:\\nassert add(1, 2) == 3 # output: -1\\nassert add(1, 2) == 4 # output: -1\\n\\n[reflection on previous impl]:\\nThe implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.\\n\\n[improved impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n \\\"\\\"\\\"\\n Given integers a and b, return the total value of a and b.\\n \\\"\\\"\\\"\\n return a + b\\n```\\n\\n[context]\\nSolve the problem in Python:\\ndef sort_array(arr):\\n \\\"\\\"\\\"\\n In this Kata, you have to sort an array of non-negative integers according to\\n number of ones in their binary representation in ascending order.\\n For similar number of ones, sort based on decimal value.\\n\\n It must be implemented like this:\\n >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]\\n >>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]\\n >>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]\\n \\\"\\\"\\\"\\n\\n\\n[previous impl]\\n\\ndef sort_array(arr):\\n # Helper function to count the number of ones in the binary representation\\n def count_ones(n):\\n return bin(n).count('1')\\n \\n # Sort the array using a custom key function\\n # The key function returns a tuple (number of ones, value) for each element\\n # This ensures that if two elements have the same number of ones, they are sorted by their value\\n sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x))\\n \\n return sorted_arr\\n```\\n\\n[runtime Error]\\n[user: Tested passed:\\n\\nTests failed:\\nassert sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5] # output: [1, 2, 4, 3, 5]\\n]\\n\\nAnalysis the error step by step, provide me improve method and code. Remember to follow [context] requirement. Don't forget write code for steps behind the error step.\\n[reflection on previous impl]:\\nxxx\\n\"}]": { "reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.", "improved_impl": "def sort_array(arr):\n # Helper function to count the number of ones in the binary representation\n def count_ones(n):\n return bin(n).count('1')\n \n # Sort the array using a custom key function\n # The key function returns a tuple (number of ones, value) for each element\n # This ensures that if two elements have the same number of ones, they are sorted by their value\n # We use a stable sorting algorithm to maintain the relative order of elements with the same number of ones\n sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x), reverse=True)\n sorted_arr = sorted(sorted_arr, key=count_ones)\n \n return sorted_arr\n" + }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "from sklearn import datasets\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\ndf_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)\ndf_iris['target'] = iris.target\n\n# Display basic information about the dataset\nprint(df_iris.info())\n\n# Display statistical summary of the dataset\nprint(df_iris.describe())\n\n# Display the first few rows of the dataset\nprint(df_iris.head())\n\n# Display the distribution of the target variable\ntarget_counts = df_iris['target'].value_counts()\nprint(target_counts)" + }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "from sklearn import datasets\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\niris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)\niris_df['target'] = iris.target\niris_df['target_name'] = iris_df['target'].apply(lambda x: iris.target_names[x])\n\n# Plotting\nfig, ax = plt.subplots(figsize=(12, 8))\nfor target, target_name in zip(iris.target_names, iris.target_names):\n subset = iris_df[iris_df['target_name'] == target_name]\n ax.scatter(subset[iris.feature_names[0]], subset[iris.feature_names[1]], label=target_name)\n\nax.set_xlabel(iris.feature_names[0])\nax.set_ylabel(iris.feature_names[1])\nax.legend()\nplt.show()" } } \ No newline at end of file diff --git a/tests/metagpt/actions/ci/test_execute_nb_code.py b/tests/metagpt/actions/ci/test_execute_nb_code.py index 6402cb883..72a85dd08 100644 --- a/tests/metagpt/actions/ci/test_execute_nb_code.py +++ b/tests/metagpt/actions/ci/test_execute_nb_code.py @@ -67,11 +67,6 @@ def test_truncate(): output, is_success = truncate("