From 9343a6bd2cf998877ccde4b0b9942474e05526d6 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 5 Feb 2024 15:40:41 +0800
Subject: [PATCH] mv pip success logic, rm redundant prompt

---
 metagpt/actions/ci/execute_nb_code.py |  8 ++++-
 metagpt/actions/ci/ml_action.py       |  4 +--
 metagpt/prompts/ci/ml_action.py       | 50 ++++-----------------------
 metagpt/roles/ci/code_interpreter.py  |  5 +--
 4 files changed, 17 insertions(+), 50 deletions(-)

diff --git a/metagpt/actions/ci/execute_nb_code.py b/metagpt/actions/ci/execute_nb_code.py
index 300ee3807..6a8c32b7f 100644
--- a/metagpt/actions/ci/execute_nb_code.py
+++ b/metagpt/actions/ci/execute_nb_code.py
@@ -181,7 +181,13 @@ class ExecuteNbCode(Action):
 
             # code success
             outputs = self.parse_outputs(self.nb.cells[-1].outputs)
-            return truncate(remove_escape_and_color_codes(outputs), is_success=success)
+            outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success)
+
+            if "!pip" in outputs:
+                success = False
+
+            return outputs, success
+
         elif language == "markdown":
             # add markdown content to markdown cell in a notebook.
             self.add_markdown_cell(code)
diff --git a/metagpt/actions/ci/ml_action.py b/metagpt/actions/ci/ml_action.py
index 60fe18c1b..e18d0fd20 100644
--- a/metagpt/actions/ci/ml_action.py
+++ b/metagpt/actions/ci/ml_action.py
@@ -5,7 +5,7 @@ from typing import Tuple
 from metagpt.actions import Action
 from metagpt.actions.ci.write_analysis_code import WriteCodeWithTools
 from metagpt.prompts.ci.ml_action import (
-    GENERATE_CODE_PROMPT,
+    ML_GENERATE_CODE_PROMPT,
     ML_TOOL_USAGE_PROMPT,
     PRINT_DATA_COLUMNS,
     UPDATE_DATA_COLUMNS,
@@ -43,7 +43,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
             )
 
         else:
-            prompt = GENERATE_CODE_PROMPT.format(
+            prompt = ML_GENERATE_CODE_PROMPT.format(
                 user_requirement=plan.goal,
                 history_code=code_context,
                 current_task=plan.current_task.instruction,
diff --git a/metagpt/prompts/ci/ml_action.py b/metagpt/prompts/ci/ml_action.py
index 46d419dfb..5d27c7ff0 100644
--- a/metagpt/prompts/ci/ml_action.py
+++ b/metagpt/prompts/ci/ml_action.py
@@ -27,28 +27,6 @@ print(column_info)
 - Import `get_column_info` only if it's not already imported.
 """
 
-GEN_DATA_DESC_PROMPT = """
-Here is the head 5 rows of the dataset:
-{data_head}
-
-Please provide a brief one-sentence background of the dataset, and concise meaning for each column. Keep descriptions short.
-
-Output the information in a JSON format, as shown in this example:
-```json
-{
-    "data_desc": "Brief dataset background.",
-    "column_desc": {
-        "column_name1": "Abstract meaning of the first column.",
-        "column_name2": "Abstract meaning of the second column.",
-        ...
-    }
-}
-```
-
-# Constraints:
-- Don't contain specific values or examples found in the data column.
-"""
-
 PRINT_DATA_COLUMNS = {
     "name": "print_column_info",
     "description": "Print the latest column information after 'Done Tasks' code if first read or data changed.",
@@ -64,7 +42,7 @@ PRINT_DATA_COLUMNS = {
     },
 }
 
-GENERATE_CODE_PROMPT = """
+ML_COMMON_PROMPT = """
 # Background
 As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
 
@@ -83,7 +61,9 @@ Latest data info after previous tasks:
 # Task
 Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
 Specifically, {tool_type_usage_prompt}
+"""
 
+USE_NO_TOOLS_EXAMPLE = """
 # Output Example:
 when current task is "train a lightgbm model on training data", the code can be like:
 ```python
@@ -105,26 +85,7 @@ model.fit(train, y_train)
 - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
 """
 
-ML_TOOL_USAGE_PROMPT = """
-# Background
-As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
-
-## Done Tasks
-```python
-{history_code}
-```end
-
-## Current Task
-{current_task}
-
-# Latest Data Info
-Latest data info after previous tasks:
-{column_info}
-
-# Task
-Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
-Specifically, {tool_type_usage_prompt}
-
+USE_TOOLS_EXAMPLE = """
 # Capabilities
 - You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
@@ -162,3 +123,6 @@ for col in num_cols:
 - Always prioritize using pre-defined tools for the same functionality.
 - Always copy the DataFrame before processing it and use the copy to process.
 """
+
+ML_GENERATE_CODE_PROMPT = ML_COMMON_PROMPT + USE_NO_TOOLS_EXAMPLE
+ML_TOOL_USAGE_PROMPT = ML_COMMON_PROMPT + USE_TOOLS_EXAMPLE
diff --git a/metagpt/roles/ci/code_interpreter.py b/metagpt/roles/ci/code_interpreter.py
index 2572d09c5..796abba04 100644
--- a/metagpt/roles/ci/code_interpreter.py
+++ b/metagpt/roles/ci/code_interpreter.py
@@ -64,9 +64,6 @@ class CodeInterpreter(Role):
             self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))
 
             ### process execution result ###
-            if "!pip" in code["code"]:
-                success = False
-
             counter += 1
 
             if not success and counter >= max_retry:
@@ -76,7 +73,7 @@ class CodeInterpreter(Role):
                     counter = 0  # redo the task again with help of human suggestions
 
         py_code = (
-            code["code"] if code.get("language") != "markdown" else ""
+            code["code"] if code.get("language") == "python" else ""
         )  # use python code as final code; for markdown, return the rendered result instead of the code itself
 
         return py_code, result, success