From 9343a6bd2cf998877ccde4b0b9942474e05526d6 Mon Sep 17 00:00:00 2001 From: yzlin Date: Mon, 5 Feb 2024 15:40:41 +0800 Subject: [PATCH] mv pip success logic, rm redundant prompt --- metagpt/actions/ci/execute_nb_code.py | 8 ++++- metagpt/actions/ci/ml_action.py | 4 +-- metagpt/prompts/ci/ml_action.py | 50 ++++----------------------- metagpt/roles/ci/code_interpreter.py | 5 +-- 4 files changed, 17 insertions(+), 50 deletions(-) diff --git a/metagpt/actions/ci/execute_nb_code.py b/metagpt/actions/ci/execute_nb_code.py index 300ee3807..6a8c32b7f 100644 --- a/metagpt/actions/ci/execute_nb_code.py +++ b/metagpt/actions/ci/execute_nb_code.py @@ -181,7 +181,13 @@ class ExecuteNbCode(Action): # code success outputs = self.parse_outputs(self.nb.cells[-1].outputs) - return truncate(remove_escape_and_color_codes(outputs), is_success=success) + outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success) + + if "!pip" in outputs: + success = False + + return outputs, success + elif language == "markdown": # add markdown content to markdown cell in a notebook. self.add_markdown_cell(code) diff --git a/metagpt/actions/ci/ml_action.py b/metagpt/actions/ci/ml_action.py index 60fe18c1b..e18d0fd20 100644 --- a/metagpt/actions/ci/ml_action.py +++ b/metagpt/actions/ci/ml_action.py @@ -5,7 +5,7 @@ from typing import Tuple from metagpt.actions import Action from metagpt.actions.ci.write_analysis_code import WriteCodeWithTools from metagpt.prompts.ci.ml_action import ( - GENERATE_CODE_PROMPT, + ML_GENERATE_CODE_PROMPT, ML_TOOL_USAGE_PROMPT, PRINT_DATA_COLUMNS, UPDATE_DATA_COLUMNS, @@ -43,7 +43,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools): ) else: - prompt = GENERATE_CODE_PROMPT.format( + prompt = ML_GENERATE_CODE_PROMPT.format( user_requirement=plan.goal, history_code=code_context, current_task=plan.current_task.instruction, diff --git a/metagpt/prompts/ci/ml_action.py b/metagpt/prompts/ci/ml_action.py index 46d419dfb..5d27c7ff0 100644 --- a/metagpt/prompts/ci/ml_action.py +++ b/metagpt/prompts/ci/ml_action.py @@ -27,28 +27,6 @@ print(column_info) - Import `get_column_info` only if it's not already imported. """ -GEN_DATA_DESC_PROMPT = """ -Here is the head 5 rows of the dataset: -{data_head} - -Please provide a brief one-sentence background of the dataset, and concise meaning for each column. Keep descriptions short. - -Output the information in a JSON format, as shown in this example: -```json -{ - "data_desc": "Brief dataset background.", - "column_desc": { - "column_name1": "Abstract meaning of the first column.", - "column_name2": "Abstract meaning of the second column.", - ... - } -} -``` - -# Constraints: -- Don't contain specific values or examples found in the data column. -""" - PRINT_DATA_COLUMNS = { "name": "print_column_info", "description": "Print the latest column information after 'Done Tasks' code if first read or data changed.", @@ -64,7 +42,7 @@ PRINT_DATA_COLUMNS = { }, } -GENERATE_CODE_PROMPT = """ +ML_COMMON_PROMPT = """ # Background As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook. @@ -83,7 +61,9 @@ Latest data info after previous tasks: # Task Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc. Specifically, {tool_type_usage_prompt} +""" +USE_NO_TOOLS_EXAMPLE = """ # Output Example: when current task is "train a lightgbm model on training data", the code can be like: ```python @@ -105,26 +85,7 @@ model.fit(train, y_train) - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed. """ -ML_TOOL_USAGE_PROMPT = """ -# Background -As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook. - -## Done Tasks -```python -{history_code} -```end - -## Current Task -{current_task} - -# Latest Data Info -Latest data info after previous tasks: -{column_info} - -# Task -Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc. -Specifically, {tool_type_usage_prompt} - +USE_TOOLS_EXAMPLE = """ # Capabilities - You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class. - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc.. @@ -162,3 +123,6 @@ for col in num_cols: - Always prioritize using pre-defined tools for the same functionality. - Always copy the DataFrame before processing it and use the copy to process. """ + +ML_GENERATE_CODE_PROMPT = ML_COMMON_PROMPT + USE_NO_TOOLS_EXAMPLE +ML_TOOL_USAGE_PROMPT = ML_COMMON_PROMPT + USE_TOOLS_EXAMPLE diff --git a/metagpt/roles/ci/code_interpreter.py b/metagpt/roles/ci/code_interpreter.py index 2572d09c5..796abba04 100644 --- a/metagpt/roles/ci/code_interpreter.py +++ b/metagpt/roles/ci/code_interpreter.py @@ -64,9 +64,6 @@ class CodeInterpreter(Role): self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode)) ### process execution result ### - if "!pip" in code["code"]: - success = False - counter += 1 if not success and counter >= max_retry: @@ -76,7 +73,7 @@ class CodeInterpreter(Role): counter = 0 # redo the task again with help of human suggestions py_code = ( - code["code"] if code.get("language") != "markdown" else "" + code["code"] if code.get("language") == "python" else "" ) # use python code as final code; for markdown, return the rendered result instead of the code itself return py_code, result, success