mv pip success logic, rm redundant prompt

2026-05-21 14:05:17 +02:00 · 2024-02-05 15:40:41 +08:00 · 2024-02-05 15:40:41 +08:00 · 9343a6bd2c
commit 9343a6bd2c
parent 748aabce70
4 changed files with 17 additions and 50 deletions
--- a/metagpt/actions/ci/execute_nb_code.py
+++ b/metagpt/actions/ci/execute_nb_code.py
@ -181,7 +181,13 @@ class ExecuteNbCode(Action):

            # code success
            outputs = self.parse_outputs(self.nb.cells[-1].outputs)
-            return truncate(remove_escape_and_color_codes(outputs), is_success=success)
+            outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success)
+
+            if "!pip" in outputs:
+                success = False
+
+            return outputs, success
+
        elif language == "markdown":
            # add markdown content to markdown cell in a notebook.
            self.add_markdown_cell(code)
--- a/metagpt/actions/ci/ml_action.py
+++ b/metagpt/actions/ci/ml_action.py
@ -5,7 +5,7 @@ from typing import Tuple
 from metagpt.actions import Action
 from metagpt.actions.ci.write_analysis_code import WriteCodeWithTools
 from metagpt.prompts.ci.ml_action import (
-    GENERATE_CODE_PROMPT,
+    ML_GENERATE_CODE_PROMPT,
    ML_TOOL_USAGE_PROMPT,
    PRINT_DATA_COLUMNS,
    UPDATE_DATA_COLUMNS,
@ -43,7 +43,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
            )

        else:
-            prompt = GENERATE_CODE_PROMPT.format(
+            prompt = ML_GENERATE_CODE_PROMPT.format(
                user_requirement=plan.goal,
                history_code=code_context,
                current_task=plan.current_task.instruction,
--- a/metagpt/prompts/ci/ml_action.py
+++ b/metagpt/prompts/ci/ml_action.py
@ -27,28 +27,6 @@ print(column_info)
 - Import `get_column_info` only if it's not already imported.
 """

-GEN_DATA_DESC_PROMPT = """
-Here is the head 5 rows of the dataset:
-{data_head}
-
-Please provide a brief one-sentence background of the dataset, and concise meaning for each column. Keep descriptions short.
-
-Output the information in a JSON format, as shown in this example:
-```json
-{
-    "data_desc": "Brief dataset background.",
-    "column_desc": {
-        "column_name1": "Abstract meaning of the first column.",
-        "column_name2": "Abstract meaning of the second column.",
-        ...
-    }
-}
-```
-
-# Constraints:
- Don't contain specific values or examples found in the data column.
-"""
-
 PRINT_DATA_COLUMNS = {
    "name": "print_column_info",
    "description": "Print the latest column information after 'Done Tasks' code if first read or data changed.",
@ -64,7 +42,7 @@ PRINT_DATA_COLUMNS = {
    },
 }

-GENERATE_CODE_PROMPT = """
+ML_COMMON_PROMPT = """
 # Background
 As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.

@ -83,7 +61,9 @@ Latest data info after previous tasks:
 # Task
 Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
 Specifically, {tool_type_usage_prompt}
+"""

+USE_NO_TOOLS_EXAMPLE = """
 # Output Example:
 when current task is "train a lightgbm model on training data", the code can be like:
 ```python
@ -105,26 +85,7 @@ model.fit(train, y_train)
 - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
 """

-ML_TOOL_USAGE_PROMPT = """
-# Background
-As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
-
-## Done Tasks
-```python
-{history_code}
-```end
-
-## Current Task
-{current_task}
-
-# Latest Data Info
-Latest data info after previous tasks:
-{column_info}
-
-# Task
-Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
-Specifically, {tool_type_usage_prompt}
-
+USE_TOOLS_EXAMPLE = """
 # Capabilities
 - You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
@ -162,3 +123,6 @@ for col in num_cols:
 - Always prioritize using pre-defined tools for the same functionality.
 - Always copy the DataFrame before processing it and use the copy to process.
 """
+
+ML_GENERATE_CODE_PROMPT = ML_COMMON_PROMPT + USE_NO_TOOLS_EXAMPLE
+ML_TOOL_USAGE_PROMPT = ML_COMMON_PROMPT + USE_TOOLS_EXAMPLE
--- a/metagpt/roles/ci/code_interpreter.py
+++ b/metagpt/roles/ci/code_interpreter.py
@ -64,9 +64,6 @@ class CodeInterpreter(Role):
            self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))

            ### process execution result ###
-            if "!pip" in code["code"]:
-                success = False
-
            counter += 1

            if not success and counter >= max_retry:
@ -76,7 +73,7 @@ class CodeInterpreter(Role):
                    counter = 0  # redo the task again with help of human suggestions

        py_code = (
-            code["code"] if code.get("language") != "markdown" else ""
+            code["code"] if code.get("language") == "python" else ""
        )  # use python code as final code; for markdown, return the rendered result instead of the code itself

        return py_code, result, success