diff --git a/metagpt/actions/__init__.py b/metagpt/actions/__init__.py
index c8c966c3d..3f88fbcf3 100644
--- a/metagpt/actions/__init__.py
+++ b/metagpt/actions/__init__.py
@@ -22,7 +22,7 @@ from metagpt.actions.write_code_review import WriteCodeReview
 from metagpt.actions.write_prd import WritePRD
 from metagpt.actions.write_prd_review import WritePRDReview
 from metagpt.actions.write_test import WriteTest
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate
 from metagpt.actions.write_plan import WritePlan
 
@@ -45,7 +45,7 @@ class ActionType(Enum):
     COLLECT_LINKS = CollectLinks
     WEB_BROWSE_AND_SUMMARIZE = WebBrowseAndSummarize
     CONDUCT_RESEARCH = ConductResearch
-    EXECUTE_PYCODE = ExecutePyCode
+    EXECUTE_NB_CODE = ExecuteNbCode
     WRITE_CODE_BY_GENERATE = WriteCodeByGenerate
     WRITE_PLAN = WritePlan
 
diff --git a/metagpt/actions/ask_review.py b/metagpt/actions/ask_review.py
index a20395104..25b4314fe 100644
--- a/metagpt/actions/ask_review.py
+++ b/metagpt/actions/ask_review.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import Tuple
 
 from metagpt.actions import Action
 from metagpt.logs import logger
@@ -8,22 +8,24 @@ from metagpt.schema import Message, Plan
 class ReviewConst:
     TASK_REVIEW_TRIGGER = "task"
     CODE_REVIEW_TRIGGER = "code"
-    CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
-    CHANGE_WORD = ["change"]
-    EXIT_WORD = ["exit"]
+    CONTINUE_WORDS = ["confirm", "continue", "c", "yes", "y"]
+    CHANGE_WORDS = ["change"]
+    EXIT_WORDS = ["exit"]
     TASK_REVIEW_INSTRUCTION = (
-        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
-        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
+        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORDS[0]} task task_id or current task, ... (things to change)' "
+        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORDS[0]}"
     )
     CODE_REVIEW_INSTRUCTION = (
-        f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
-        f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
+        f"If you want the codes to be rewritten, say '{CHANGE_WORDS[0]} ... (your change advice)' "
+        f"If you want to leave it as is, type: {CONTINUE_WORDS[0]} or {CONTINUE_WORDS[1]}"
     )
-    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
+    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORDS[0]}"
 
 
 class AskReview(Action):
-    async def run(self, context: List[Message] = [], plan: Plan = None, trigger: str = "task"):
+    async def run(
+        self, context: list[Message] = [], plan: Plan = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER
+    ) -> Tuple[str, bool]:
         if plan:
             logger.info("Current overall plan:")
             logger.info(
@@ -32,7 +34,7 @@ class AskReview(Action):
                 )
             )
 
-        logger.info("most recent context:")
+        logger.info("Most recent context:")
         latest_action = context[-1].cause_by if context and context[-1].cause_by else ""
         review_instruction = (
             ReviewConst.TASK_REVIEW_INSTRUCTION
@@ -48,11 +50,11 @@ class AskReview(Action):
 
         rsp = input(prompt)
 
-        if rsp.lower() in ReviewConst.EXIT_WORD:
+        if rsp.lower() in ReviewConst.EXIT_WORDS:
             exit()
 
         # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
         # One could say "confirm this task, but change the next task to ..."
-        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORDS or ReviewConst.CONTINUE_WORDS[0] in rsp.lower()
 
         return rsp, confirmed
diff --git a/metagpt/actions/debug_code.py b/metagpt/actions/debug_code.py
index 121c126c4..d63fa3396 100644
--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@@ -3,7 +3,7 @@ from typing import List
 from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode
 from metagpt.logs import logger
 from metagpt.schema import Message
-from metagpt.utils.common import create_func_config
+from metagpt.utils.common import create_func_call_config
 
 DEBUG_REFLECTION_EXAMPLE = '''
 Example 1:
@@ -100,7 +100,7 @@ class DebugCode(BaseWriteAnalysisCode):
         info.append(Message(role="system", content=system_prompt))
         info.append(Message(role="user", content=reflection_prompt))
 
-        resp = await self.llm.aask_code(messages=info, **create_func_config(CODE_REFLECTION))
+        resp = await self.llm.aask_code(messages=info, **create_func_call_config(CODE_REFLECTION))
         logger.info(f"reflection is {resp}")
         return resp
 
diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_nb_code.py
similarity index 94%
rename from metagpt/actions/execute_code.py
rename to metagpt/actions/execute_nb_code.py
index 6a4a9abb8..7dfbecb5c 100644
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_nb_code.py
@@ -7,7 +7,6 @@
 import asyncio
 import re
 import traceback
-from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Dict, List, Tuple, Union
 
@@ -28,30 +27,8 @@ from metagpt.logs import logger
 from metagpt.schema import Message
 
 
-class ExecuteCode(ABC):
-    @abstractmethod
-    async def build(self):
-        """build code executor"""
-        ...
-
-    @abstractmethod
-    async def run(self, code: str):
-        """run code"""
-        ...
-
-    @abstractmethod
-    async def terminate(self):
-        """terminate executor"""
-        ...
-
-    @abstractmethod
-    async def reset(self):
-        """reset executor"""
-        ...
-
-
-class ExecutePyCode(ExecuteCode, Action):
-    """execute code, return result to llm, and display it."""
+class ExecuteNbCode(Action):
+    """execute notebook code block, return result to llm, and display it."""
 
     nb: Any
     nb_client: Any
diff --git a/metagpt/actions/ml_action.py b/metagpt/actions/ml_action.py
index a61233e5a..d419026fa 100644
--- a/metagpt/actions/ml_action.py
+++ b/metagpt/actions/ml_action.py
@@ -11,7 +11,7 @@ from metagpt.prompts.ml_action import (
 )
 from metagpt.prompts.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
 from metagpt.schema import Message, Plan
-from metagpt.utils.common import CodeParser, create_func_config, remove_comments
+from metagpt.utils.common import CodeParser, create_func_call_config, remove_comments
 
 
 class WriteCodeWithToolsML(WriteCodeWithTools):
@@ -52,7 +52,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
                 tool_type_usage_prompt=tool_type_usage_prompt,
                 code_steps=code_steps,
             )
-        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
+        tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
 
         # Extra output to be used for potential debugging
@@ -97,6 +97,6 @@ class UpdateDataColumns(Action):
         code_context = [remove_comments(task.code) for task in finished_tasks]
         code_context = "\n\n".join(code_context)
         prompt = UPDATE_DATA_COLUMNS.format(history_code=code_context)
-        tool_config = create_func_config(PRINT_DATA_COLUMNS)
+        tool_config = create_func_call_config(PRINT_DATA_COLUMNS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
         return rsp
diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index 5cea9fe51..bf00e8ed1 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -2,9 +2,9 @@
 """
 @Date    :   2023/11/20 13:19:39
 @Author  :   orange-crow
-@File    :   write_code_v2.py
+@File    :   write_analysis_code.py
 """
-from typing import Dict, List, Tuple, Union
+from typing import Dict, Tuple, Union
 
 from metagpt.actions import Action
 from metagpt.logs import logger
@@ -17,14 +17,14 @@ from metagpt.prompts.write_analysis_code import (
 from metagpt.schema import Message, Plan
 from metagpt.tools import TOOL_REGISTRY
 from metagpt.tools.tool_registry import validate_tool_names
-from metagpt.utils.common import create_func_config
+from metagpt.utils.common import create_func_call_config
 
 
 class BaseWriteAnalysisCode(Action):
     DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**"""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
     # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
 
-    def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None):
+    def process_msg(self, prompt: Union[str, list[Dict], Message, list[Message]], system_msg: str = None):
         default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG
         # 全部转成list
         if not isinstance(prompt, list):
@@ -53,16 +53,17 @@ class BaseWriteAnalysisCode(Action):
             }
         return messages
 
-    async def run(self, context: List[Message], plan: Plan = None) -> dict:
+    async def run(self, context: list[Message], plan: Plan = None) -> dict:
         """Run of a code writing action, used in data analysis or modeling
 
         Args:
-            context (List[Message]): Action output history, source action denoted by Message.cause_by
+            context (list[Message]): Action output history, source action denoted by Message.cause_by
             plan (Plan, optional): Overall plan. Defaults to None.
 
         Returns:
             dict: code result in the format of {"code": "print('hello world')", "language": "python"}
         """
+        raise NotImplementedError
 
 
 class WriteCodeByGenerate(BaseWriteAnalysisCode):
@@ -70,7 +71,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
 
     async def run(
         self,
-        context: [List[Message]],
+        context: [list[Message]],
         plan: Plan = None,
         system_msg: str = None,
         **kwargs,
@@ -128,7 +129,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
             code_steps=code_steps,
             available_tools=available_tools,
         )
-        tool_config = create_func_config(SELECT_FUNCTION_TOOLS)
+        tool_config = create_func_call_config(SELECT_FUNCTION_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
         recommend_tools = rsp["recommend_tools"]
         logger.info(f"Recommended tools: \n{recommend_tools}")
@@ -169,7 +170,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
 
     async def run(
         self,
-        context: List[Message],
+        context: list[Message],
         plan: Plan,
         **kwargs,
     ) -> str:
@@ -184,7 +185,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
 
         # prepare prompt & LLM call
         prompt = self.process_msg(context)
-        tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
+        tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
         rsp = await self.llm.aask_code(prompt, **tool_config)
 
         return rsp
diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py
index 335a09841..77b52b78e 100644
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@@ -16,7 +16,7 @@ from metagpt.prompts.write_analysis_code import (
 )
 from metagpt.schema import Message, Plan, Task
 from metagpt.tools import TOOL_REGISTRY
-from metagpt.utils.common import CodeParser, create_func_config
+from metagpt.utils.common import CodeParser, create_func_call_config
 
 
 class WritePlan(Action):
@@ -56,7 +56,7 @@ class WritePlan(Action):
         prompt = ASSIGN_TASK_TYPE_PROMPT.format(
             task_list=task_list, task_type_desc=task_type_desc
         )  # task types are set to be the same as tool types, for now
-        tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG)
+        tool_config = create_func_call_config(ASSIGN_TASK_TYPE_CONFIG)
         rsp = await self.llm.aask_code(prompt, **tool_config)
         task_type_list = rsp["task_type"]
         print(f"assigned task types: {task_type_list}")
diff --git a/metagpt/plan/planner.py b/metagpt/plan/planner.py
index 0d8870fd3..6e866ec22 100644
--- a/metagpt/plan/planner.py
+++ b/metagpt/plan/planner.py
@@ -87,7 +87,11 @@ class Planner(BaseModel):
             await self.update_plan()
 
     async def ask_review(
-        self, task_result: TaskResult = None, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER
+        self,
+        task_result: TaskResult = None,
+        auto_run: bool = None,
+        trigger: str = ReviewConst.TASK_REVIEW_TRIGGER,
+        review_context_len: int = 5,
     ):
         """
         Ask to review the task result, reviewer needs to provide confirmation or request change.
@@ -97,7 +101,9 @@ class Planner(BaseModel):
         auto_run = auto_run or self.auto_run
         if not auto_run:
             context = self.get_useful_memories()
-            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
+            review, confirmed = await AskReview().run(
+                context=context[-review_context_len:], plan=self.plan, trigger=trigger
+            )
             if not confirmed:
                 self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
             return review, confirmed
@@ -110,7 +116,7 @@ class Planner(BaseModel):
         self.working_memory.clear()
 
         confirmed_and_more = (
-            ReviewConst.CONTINUE_WORD[0] in review.lower() and review.lower() not in ReviewConst.CONTINUE_WORD[0]
+            ReviewConst.CONTINUE_WORDS[0] in review.lower() and review.lower() not in ReviewConst.CONTINUE_WORDS[0]
         )  # "confirm, ... (more content, such as changing downstream tasks)"
         if confirmed_and_more:
             self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
diff --git a/metagpt/roles/code_interpreter.py b/metagpt/roles/code_interpreter.py
index b4f9622d3..1ae4feec7 100644
--- a/metagpt/roles/code_interpreter.py
+++ b/metagpt/roles/code_interpreter.py
@@ -1,7 +1,7 @@
 from pydantic import Field
 
 from metagpt.actions.ask_review import ReviewConst
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.logs import logger
 from metagpt.roles import Role
@@ -11,7 +11,7 @@ from metagpt.schema import Message, Task, TaskResult
 class CodeInterpreter(Role):
     auto_run: bool = True
     use_tools: bool = False
-    execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
+    execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
     tools: list[str] = []
 
     def __init__(
@@ -59,7 +59,7 @@ class CodeInterpreter(Role):
             result, success = await self.execute_code.run(**code)
             print(result)
 
-            self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode))
+            self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))
 
             ### process execution result ###
             if "!pip" in code["code"]:
@@ -70,7 +70,7 @@ class CodeInterpreter(Role):
             if not success and counter >= max_retry:
                 logger.info("coding failed!")
                 review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
-                if ReviewConst.CHANGE_WORD[0] in review:
+                if ReviewConst.CHANGE_WORDS[0] in review:
                     counter = 0  # redo the task again with help of human suggestions
 
         py_code = (
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index e7abee560..19c34f62d 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -1,5 +1,5 @@
 from metagpt.actions.debug_code import DebugCode
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.actions.ml_action import UpdateDataColumns, WriteCodeWithToolsML
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter
@@ -19,7 +19,7 @@ class MLEngineer(CodeInterpreter):
             return await super()._write_code()
 
         # In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is.
-        is_first_trial = any_to_str(ExecutePyCode) not in [msg.cause_by for msg in self.working_memory.get()]
+        is_first_trial = any_to_str(ExecuteNbCode) not in [msg.cause_by for msg in self.working_memory.get()]
 
         if is_first_trial:
             # For the first trial, write task code from scratch
diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py
index 7d3d47680..55f4ce378 100644
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@@ -352,7 +352,7 @@ def parse_recipient(text):
     return ""
 
 
-def create_func_config(func_schema: dict) -> dict:
+def create_func_call_config(func_schema: dict) -> dict:
     """Create new function call config"""
     tools = [{"type": "function", "function": func_schema}]
     tool_choice = {"type": "function", "function": {"name": func_schema["name"]}}
@@ -362,7 +362,7 @@ def create_func_config(func_schema: dict) -> dict:
     }
 
 
-def remove_comments(code_str):
+def remove_comments(code_str: str) -> str:
     """Remove comments from code."""
     pattern = r"(\".*?\"|\'.*?\')|(\#.*?$)"
 
diff --git a/metagpt/utils/recovery_util.py b/metagpt/utils/recovery_util.py
index 3405b9587..d0b197e69 100644
--- a/metagpt/utils/recovery_util.py
+++ b/metagpt/utils/recovery_util.py
@@ -10,12 +10,13 @@ import nbformat
 
 from metagpt.const import DATA_PATH
 from metagpt.roles.role import Role
+from metagpt.utils.common import read_json_file
 from metagpt.utils.save_code import save_code_file
 
 
 def load_history(save_dir: str = ""):
     """
-    Load history from the specified save directory.
+    Load plan and code execution history from the specified save directory.
 
     Args:
         save_dir (str): The directory from which to load the history.
@@ -26,14 +27,14 @@ def load_history(save_dir: str = ""):
 
     plan_path = Path(save_dir) / "plan.json"
     nb_path = Path(save_dir) / "history_nb" / "code.ipynb"
-    plan = json.load(open(plan_path, "r", encoding="utf-8"))
+    plan = read_json_file(plan_path)
     nb = nbformat.read(open(nb_path, "r", encoding="utf-8"), as_version=nbformat.NO_CONVERT)
     return plan, nb
 
 
 def save_history(role: Role, save_dir: str = ""):
     """
-    Save history to the specified directory.
+    Save plan and code execution history to the specified directory.
 
     Args:
         role (Role): The role containing the plan and execute_code attributes.
diff --git a/tests/metagpt/actions/test_execute_code.py b/tests/metagpt/actions/test_execute_code.py
deleted file mode 100644
index 21627e6f9..000000000
--- a/tests/metagpt/actions/test_execute_code.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import pytest
-
-from metagpt.actions.execute_code import ExecutePyCode, truncate
-
-
-@pytest.mark.asyncio
-async def test_code_running():
-    pi = ExecutePyCode()
-    output = await pi.run("print('hello world!')")
-    assert output[1] is True
-    output = await pi.run({"code": "print('hello world!')", "language": "python"})
-    assert output[1] is True
-
-
-@pytest.mark.asyncio
-async def test_split_code_running():
-    pi = ExecutePyCode()
-    output = await pi.run("x=1\ny=2")
-    output = await pi.run("z=x+y")
-    output = await pi.run("assert z==3")
-    assert output[1] is True
-
-
-@pytest.mark.asyncio
-async def test_execute_error():
-    pi = ExecutePyCode()
-    output = await pi.run("z=1/0")
-    assert output[1] is False
-
-
-@pytest.mark.asyncio
-async def test_plotting_code():
-    pi = ExecutePyCode()
-    code = """
-    import numpy as np
-    import matplotlib.pyplot as plt
-
-    # 生成随机数据
-    random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
-
-    # 绘制直方图
-    plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
-
-    # 添加标题和标签
-    plt.title('Histogram of Random Data')
-    plt.xlabel('Value')
-    plt.ylabel('Frequency')
-
-    # 显示图形
-    plt.show()
-    plt.close()
-    """
-    output = await pi.run(code)
-    assert output[1] is True
-
-
-def test_truncate():
-    # 代码执行成功
-    output, is_success = truncate("hello world", 5, True)
-    assert "Truncated to show only first 5 characters\nhello" in output
-    assert is_success
-    # 代码执行失败
-    output, is_success = truncate("hello world", 5, False)
-    assert "Truncated to show only last 5 characters\nworld" in output
-    assert not is_success
-    # 异步
-    output, is_success = truncate("<coroutine object", 5, True)
-    assert not is_success
-    assert "await" in output
-    # 重复的desc
-    result = "Executed code successfully. Truncated to show only first 5 characters\nhello"
-    output, is_success = truncate(result, 5, True)
-    assert is_success
-    assert output == result
-
-
-@pytest.mark.asyncio
-async def test_run_with_timeout():
-    pi = ExecutePyCode(timeout=1)
-    code = "import time; time.sleep(2)"
-    message, success = await pi.run(code)
-    assert not success
-    assert message.startswith("Cell execution timed out")
-
-
-@pytest.mark.asyncio
-async def test_run_code_text():
-    pi = ExecutePyCode()
-    message, success = await pi.run(code='print("This is a code!")', language="python")
-    assert success
-    assert message == "This is a code!\n"
-    message, success = await pi.run(code="# This is a code!", language="markdown")
-    assert success
-    assert message == "# This is a code!"
-    mix_text = "# Title!\n ```python\n print('This is a code!')```"
-    message, success = await pi.run(code=mix_text, language="markdown")
-    assert success
-    assert message == mix_text
-
-
-@pytest.mark.asyncio
-async def test_terminate():
-    pi = ExecutePyCode()
-    await pi.run(code='print("This is a code!")', language="python")
-    is_kernel_alive = await pi.nb_client.km.is_alive()
-    assert is_kernel_alive
-    await pi.terminate()
-    import time
-
-    time.sleep(2)
-    assert pi.nb_client.km is None
-
-
-@pytest.mark.asyncio
-async def test_reset():
-    pi = ExecutePyCode()
-    await pi.run(code='print("This is a code!")', language="python")
-    is_kernel_alive = await pi.nb_client.km.is_alive()
-    assert is_kernel_alive
-    await pi.reset()
-    assert pi.nb_client.km is None
diff --git a/tests/metagpt/actions/test_execute_nb_code.py b/tests/metagpt/actions/test_execute_nb_code.py
new file mode 100644
index 000000000..719d14089
--- /dev/null
+++ b/tests/metagpt/actions/test_execute_nb_code.py
@@ -0,0 +1,123 @@
+import pytest
+
+from metagpt.actions.execute_nb_code import ExecuteNbCode, truncate
+
+
+@pytest.mark.asyncio
+async def test_code_running():
+    executor = ExecuteNbCode()
+    output, is_success = await executor.run("print('hello world!')")
+    assert is_success
+    output, is_success = await executor.run({"code": "print('hello world!')", "language": "python"})
+    assert is_success
+
+
+@pytest.mark.asyncio
+async def test_split_code_running():
+    executor = ExecuteNbCode()
+    _ = await executor.run("x=1\ny=2")
+    _ = await executor.run("z=x+y")
+    output, is_success = await executor.run("assert z==3")
+    assert is_success
+
+
+@pytest.mark.asyncio
+async def test_execute_error():
+    executor = ExecuteNbCode()
+    output, is_success = await executor.run("z=1/0")
+    assert not is_success
+
+
+PLOT_CODE = """
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 生成随机数据
+random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
+
+# 绘制直方图
+plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
+
+# 添加标题和标签
+plt.title('Histogram of Random Data')
+plt.xlabel('Value')
+plt.ylabel('Frequency')
+
+# 显示图形
+plt.show()
+plt.close()
+"""
+
+
+@pytest.mark.asyncio
+async def test_plotting_code():
+    executor = ExecuteNbCode()
+    output, is_success = await executor.run(PLOT_CODE)
+    assert is_success
+
+
+def test_truncate():
+    # 代码执行成功
+    output, is_success = truncate("hello world", 5, True)
+    assert "Truncated to show only first 5 characters\nhello" in output
+    assert is_success
+    # 代码执行失败
+    output, is_success = truncate("hello world", 5, False)
+    assert "Truncated to show only last 5 characters\nworld" in output
+    assert not is_success
+    # 异步
+    output, is_success = truncate("<coroutine object", 5, True)
+    assert not is_success
+    assert "await" in output
+    # 重复的desc
+    result = "Executed code successfully. Truncated to show only first 5 characters\nhello"
+    output, is_success = truncate(result, 5, True)
+    assert is_success
+    assert output == result
+
+
+@pytest.mark.asyncio
+async def test_run_with_timeout():
+    executor = ExecuteNbCode(timeout=1)
+    code = "import time; time.sleep(2)"
+    message, success = await executor.run(code)
+    assert not success
+    assert message.startswith("Cell execution timed out")
+
+
+@pytest.mark.asyncio
+async def test_run_code_text():
+    executor = ExecuteNbCode()
+    message, success = await executor.run(code='print("This is a code!")', language="python")
+    assert success
+    assert message == "This is a code!\n"
+    message, success = await executor.run(code="# This is a code!", language="markdown")
+    assert success
+    assert message == "# This is a code!"
+    mix_text = "# Title!\n ```python\n print('This is a code!')```"
+    message, success = await executor.run(code=mix_text, language="markdown")
+    assert success
+    assert message == mix_text
+
+
+@pytest.mark.asyncio
+async def test_terminate():
+    executor = ExecuteNbCode()
+    await executor.run(code='print("This is a code!")', language="python")
+    is_kernel_alive = await executor.nb_client.km.is_alive()
+    assert is_kernel_alive
+    await executor.terminate()
+    import time
+
+    time.sleep(2)
+    assert executor.nb_client.km is None
+
+
+@pytest.mark.asyncio
+async def test_reset():
+    executor = ExecuteNbCode()
+    await executor.run(code='print("This is a code!")', language="python")
+    is_kernel_alive = await executor.nb_client.km.is_alive()
+    assert is_kernel_alive
+    await executor.reset()
+    assert executor.nb_client.km is None
diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index 43f23848d..8b3a34f2f 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -2,7 +2,7 @@ import asyncio
 
 import pytest
 
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.logs import logger
 from metagpt.plan.planner import STRUCTURAL_CONTEXT
@@ -13,7 +13,7 @@ from metagpt.schema import Message, Plan, Task
 @pytest.mark.asyncio
 async def test_write_code_by_list_plan():
     write_code = WriteCodeByGenerate()
-    execute_code = ExecutePyCode()
+    execute_code = ExecuteNbCode()
     messages = []
     plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
     for task in plan:
diff --git a/tests/metagpt/roles/run_code_interpreter.py b/tests/metagpt/roles/run_code_interpreter.py
index 1c5b2873f..e5e2b8df5 100644
--- a/tests/metagpt/roles/run_code_interpreter.py
+++ b/tests/metagpt/roles/run_code_interpreter.py
@@ -1,6 +1,6 @@
 import fire
 
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.const import DATA_PATH
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter
@@ -35,7 +35,7 @@ async def run_code_interpreter(role_class, requirement, auto_run, use_tools, sav
         logger.info("Resuming from history trajectory")
         plan, nb = load_history(save_dir)
         role.planner.plan = Plan(**plan)
-        role.execute_code = ExecutePyCode(nb)
+        role.execute_code = ExecuteNbCode(nb)
 
     else:
         logger.info("Run from scratch")
diff --git a/tests/metagpt/roles/test_code_interpreter.py b/tests/metagpt/roles/test_code_interpreter.py
index dd959525e..2263b2a4a 100644
--- a/tests/metagpt/roles/test_code_interpreter.py
+++ b/tests/metagpt/roles/test_code_interpreter.py
@@ -7,7 +7,7 @@ from metagpt.roles.code_interpreter import CodeInterpreter
 @pytest.mark.asyncio
 @pytest.mark.parametrize("auto_run", [(True), (False)])
 async def test_code_interpreter(mocker, auto_run):
-    mocker.patch("metagpt.actions.execute_code.ExecutePyCode.run", return_value=("a successful run", True))
+    mocker.patch("metagpt.actions.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
     mocker.patch("builtins.input", return_value="confirm")
 
     requirement = "Run data analysis on sklearn Iris dataset, include a plot"
diff --git a/tests/metagpt/roles/test_ml_engineer.py b/tests/metagpt/roles/test_ml_engineer.py
index bc1626251..fb1e67cb8 100644
--- a/tests/metagpt/roles/test_ml_engineer.py
+++ b/tests/metagpt/roles/test_ml_engineer.py
@@ -1,6 +1,6 @@
 import pytest
 
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.logs import logger
 from metagpt.roles.ml_engineer import MLEngineer
 from metagpt.schema import Message, Plan, Task
@@ -72,7 +72,7 @@ async def test_mle_update_data_columns(mocker):
 @pytest.mark.asyncio
 async def test_mle_debug_code(mocker):
     mle = MLEngineer(auto_run=True, use_tools=True)
-    mle.working_memory.add(Message(content=ErrorStr, cause_by=ExecutePyCode))
+    mle.working_memory.add(Message(content=ErrorStr, cause_by=ExecuteNbCode))
     mle.latest_code = CODE
     mle.debug_context = DebugContext
     code, _ = await mle._write_code()
diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py
index 0674315d0..bb0b07d63 100644
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@@ -8,7 +8,7 @@ import os
 import nbformat
 import pytest
 
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_nb_code import ExecuteNbCode
 from metagpt.utils.save_code import DATA_PATH, save_code_file
 
 
@@ -33,7 +33,7 @@ def test_save_code_file_json():
 @pytest.mark.asyncio
 async def test_save_code_file_notebook():
     code = "print('Hello, World!')"
-    executor = ExecutePyCode()
+    executor = ExecuteNbCode()
     await executor.run(code)
     # Save as a Notebook file
     save_code_file("example_nb", executor.nb, file_format="ipynb")