diff --git a/config/config2.yaml.example b/config/config2.yaml.example
index 8f4a33fc1..2217f1b2c 100644
--- a/config/config2.yaml.example
+++ b/config/config2.yaml.example
@@ -1,5 +1,5 @@
 llm:
-  api_type: "openai"
+  api_type: "openai"  # or azure / ollama etc.
   base_url: "YOUR_BASE_URL"
   api_key: "YOUR_API_KEY"
   model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
diff --git a/examples/llm_vision.py b/examples/llm_vision.py
new file mode 100644
index 000000000..276decd59
--- /dev/null
+++ b/examples/llm_vision.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : example to run the ability of LLM vision
+
+import asyncio
+from pathlib import Path
+
+from metagpt.llm import LLM
+from metagpt.utils.common import encode_image
+
+
+async def main():
+    llm = LLM()
+
+    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
+    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
+    img_base64 = encode_image(invoice_path)
+    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
+    assert "true" in res.lower()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py
index 2ebeadb66..ce8d8a967 100644
--- a/metagpt/actions/research.py
+++ b/metagpt/actions/research.py
@@ -133,7 +133,7 @@ class CollectLinks(Action):
                 if len(remove) == 0:
                     break
 
-        model_name = config.get_openai_llm().model
+        model_name = config.llm.model
         prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
         logger.debug(prompt)
         queries = await self._aask(prompt, [system_text])
diff --git a/metagpt/provider/general_api_requestor.py b/metagpt/provider/general_api_requestor.py
index 500cd1426..18f4dd909 100644
--- a/metagpt/provider/general_api_requestor.py
+++ b/metagpt/provider/general_api_requestor.py
@@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
         self, result: requests.Response, stream: bool
     ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
         """Returns the response(s) and a bool indicating whether it is a stream."""
-        if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
+        content_type = result.headers.get("Content-Type", "")
+        if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
             return (
                 self._interpret_response_line(line, result.status_code, result.headers, stream=True)
                 for line in parse_stream(result.iter_lines())
diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py
index 1e5770d74..2ae14f437 100644
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@@ -233,14 +233,16 @@ class OpenAILLM(BaseLLM):
             usage.prompt_tokens = count_message_tokens(messages, self.model)
             usage.completion_tokens = count_string_tokens(rsp, self.model)
         except Exception as e:
-            logger.error(f"usage calculation failed: {e}")
+            logger.warning(f"usage calculation failed: {e}")
 
         return usage
 
     def _get_max_tokens(self, messages: list[dict]):
         if not self.auto_max_tokens:
             return self.config.max_token
-        return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
+        # FIXME
+        # https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
+        return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)
 
     @handle_exception
     async def amoderation(self, content: Union[str, list[str]]):
diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py
index b7c160a41..4cbee4038 100644
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@@ -3,9 +3,8 @@
 # @Desc   : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
 
 from enum import Enum
+from typing import Optional
 
-import openai
-import zhipuai
 from requests import ConnectionError
 from tenacity import (
     after_log,
@@ -14,6 +13,7 @@ from tenacity import (
     stop_after_attempt,
     wait_random_exponential,
 )
+from zhipuai.types.chat.chat_completion import Completion
 
 from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.logs import log_llm_stream, logger
@@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
+from metagpt.utils.cost_manager import CostManager
 
 
 class ZhiPuEvent(Enum):
@@ -38,27 +39,22 @@ class ZhiPuAILLM(BaseLLM):
     """
 
     def __init__(self, config: LLMConfig):
-        self.__init_zhipuai(config)
-        self.llm = ZhiPuModelAPI
-        self.model = "chatglm_turbo"  # so far only one model, just use it
-        self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
         self.config = config
+        self.__init_zhipuai()
+        self.cost_manager: Optional[CostManager] = None
 
-    def __init_zhipuai(self, config: LLMConfig):
-        assert config.api_key
-        zhipuai.api_key = config.api_key
-        # due to use openai sdk, set the api_key but it will't be used.
-        # openai.api_key = zhipuai.api_key  # due to use openai sdk, set the api_key but it will't be used.
-        if config.proxy:
-            # FIXME: openai v1.x sdk has no proxy support
-            openai.proxy = config.proxy
+    def __init_zhipuai(self):
+        assert self.config.api_key
+        self.api_key = self.config.api_key
+        self.model = self.config.model  # so far, it support glm-3-turbo、glm-4
+        self.llm = ZhiPuModelAPI(api_key=self.api_key)
 
     def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
         kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
         return kwargs
 
     def completion(self, messages: list[dict], timeout=3) -> dict:
-        resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
+        resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
         usage = resp.usage.model_dump()
         self._update_costs(usage)
         return resp.model_dump()
diff --git a/metagpt/roles/ci/code_interpreter.py b/metagpt/roles/ci/code_interpreter.py
index 796abba04..404c93b81 100644
--- a/metagpt/roles/ci/code_interpreter.py
+++ b/metagpt/roles/ci/code_interpreter.py
@@ -72,11 +72,7 @@ class CodeInterpreter(Role):
                 if ReviewConst.CHANGE_WORDS[0] in review:
                     counter = 0  # redo the task again with help of human suggestions
 
-        py_code = (
-            code["code"] if code.get("language") == "python" else ""
-        )  # use python code as final code; for markdown, return the rendered result instead of the code itself
-
-        return py_code, result, success
+        return code["code"], result, success
 
     async def _write_code(self):
         todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py
index e1c0f415b..4e6b65b2c 100644
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@@ -42,6 +42,10 @@ class CostManager(BaseModel):
         """
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
+        if model not in TOKEN_COSTS:
+            logger.warning(f"Model {model} not found in TOKEN_COSTS.")
+            return
+
         cost = (
             prompt_tokens * self.token_costs[model]["prompt"]
             + completion_tokens * self.token_costs[model]["completion"]
diff --git a/metagpt/utils/text.py b/metagpt/utils/text.py
index dd9678438..fb8b94232 100644
--- a/metagpt/utils/text.py
+++ b/metagpt/utils/text.py
@@ -25,7 +25,7 @@ def reduce_message_length(
     """
     max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
     for msg in msgs:
-        if count_string_tokens(msg, model_name) < max_token:
+        if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
             return msg
 
     raise RuntimeError("fail to reduce message length")
@@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
             continue
         ret = ["".join(j) for j in _split_by_count(sentences, count)]
         return ret
-    return _split_by_count(paragraph, count)
+    return list(_split_by_count(paragraph, count))
 
 
 def decode_unicode_escape(text: str) -> str:
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index b69ec73d3..2ec0edc99 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -32,8 +32,8 @@ TOKEN_COSTS = {
     "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
     "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
     "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
-    "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
+    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
     "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
 }
 
@@ -111,7 +111,8 @@ TOKEN_MAX = {
     "gpt-4-vision-preview": 128000,
     "gpt-4-1106-vision-preview": 128000,
     "text-embedding-ada-002": 8192,
-    "chatglm_turbo": 32768,
+    "glm-3-turbo": 128000,
+    "glm-4": 128000,
     "gemini-pro": 32768,
 }
 
diff --git a/requirements.txt b/requirements.txt
index c893bd713..b5d8d7d51 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -63,7 +63,7 @@ gitignore-parser==0.1.9
 websockets~=12.0
 networkx~=3.2.1
 google-generativeai==0.3.2
-# playwright==1.40.0  # playwright extras require
+playwright>=1.26  # used at metagpt/tools/libs/web_scraping.py
 anytree
 ipywidgets==8.1.1
 Pillow
diff --git a/setup.py b/setup.py
index b16d978cf..be3956ea4 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,6 @@ requirements = (here / "requirements.txt").read_text(encoding="utf-8").splitline
 
 
 extras_require = {
-    "playwright": ["playwright>=1.26", "beautifulsoup4"],
     "selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"],
     "search-google": ["google-api-python-client==2.94.0"],
     "search-ddg": ["duckduckgo-search~=4.1.1"],
diff --git a/tests/data/rsp_cache.json b/tests/data/rsp_cache.json
index 40d7d3953..75fc9ceb2 100644
--- a/tests/data/rsp_cache.json
+++ b/tests/data/rsp_cache.json
@@ -389,5 +389,11 @@
         "reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.",
         "improved_impl": "def sort_array(arr):\n    # Helper function to count the number of ones in the binary representation\n    def count_ones(n):\n        return bin(n).count('1')\n    \n    # Sort the array using a custom key function\n    # The key function returns a tuple (number of ones, value) for each element\n    # This ensures that if two elements have the same number of ones, they are sorted by their value\n    # We use a stable sorting algorithm to maintain the relative order of elements with the same number of ones\n    sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x), reverse=True)\n    sorted_arr = sorted(sorted_arr, key=count_ones)\n    \n    return sorted_arr\n"
     },
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import pandas as pd\n\n# Convert the Iris dataset to a DataFrame for easier manipulation\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\niris_df['target'] = iris_data.target\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\n\n# Display basic information about the dataset\niris_df.info()\n\n# Display statistical summary of the dataset\niris_df.describe()\n\n# Display the first few rows of the dataset\ndisplay(iris_df.head())"
+    },
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"import pandas as pd\\\\n\\\\n# Convert the Iris dataset to a DataFrame for easier manipulation\\\\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\\\\niris_df['target'] = iris_data.target\\\\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\\\\n\\\\n# Display basic information about the dataset\\\\niris_df.info()\\\\n\\\\n# Display statistical summary of the dataset\\\\niris_df.describe()\\\\n\\\\n# Display the first few rows of the dataset\\\\ndisplay(iris_df.head())\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set_style('whitegrid')\n\n# Pairplot to visualize the relationships between features\nsns.pairplot(iris_df, hue='target_names', markers='+')\nplt.show()"
+    },
     "\n## context\n\n\n-----\n\n## format example\n[CONTENT]\n{\n    \"invoice\": \"False\"\n}\n[/CONTENT]\n\n## nodes: \"<node>: <type>  # <instruction>\"\n- invoice: <class 'bool'>  # if it's a invoice file, return True else False\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n    \"invoice\": \"True\"\n}\n[/CONTENT]"
 }
\ No newline at end of file
diff --git a/tests/metagpt/actions/test_rebuild_class_view.py b/tests/metagpt/actions/test_rebuild_class_view.py
index 403109cc0..2188d6b85 100644
--- a/tests/metagpt/actions/test_rebuild_class_view.py
+++ b/tests/metagpt/actions/test_rebuild_class_view.py
@@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView
 from metagpt.llm import LLM
 
 
+@pytest.mark.skip
 @pytest.mark.asyncio
 async def test_rebuild(context):
     action = RebuildClassView(
diff --git a/tests/metagpt/actions/test_summarize_code.py b/tests/metagpt/actions/test_summarize_code.py
index a404047c1..3cfe7ca81 100644
--- a/tests/metagpt/actions/test_summarize_code.py
+++ b/tests/metagpt/actions/test_summarize_code.py
@@ -176,6 +176,7 @@ class Snake:
 """
 
 
+@pytest.mark.skip
 @pytest.mark.asyncio
 async def test_summarize_code(context):
     git_dir = Path(__file__).parent / f"unittest/{uuid.uuid4().hex}"
diff --git a/tests/metagpt/provider/test_zhipuai_api.py b/tests/metagpt/provider/test_zhipuai_api.py
index 8ec9ab4f9..c51010122 100644
--- a/tests/metagpt/provider/test_zhipuai_api.py
+++ b/tests/metagpt/provider/test_zhipuai_api.py
@@ -19,7 +19,7 @@ resp_cont = resp_cont_tmpl.format(name=name)
 default_resp = get_part_chat_completion(name)
 
 
-async def mock_zhipuai_acreate_stream(**kwargs):
+async def mock_zhipuai_acreate_stream(self, **kwargs):
     class MockResponse(object):
         async def _aread(self):
             class Iterator(object):
@@ -39,7 +39,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
     return MockResponse()
 
 
-async def mock_zhipuai_acreate(**kwargs) -> dict:
+async def mock_zhipuai_acreate(self, **kwargs) -> dict:
     return default_resp
 
 
diff --git a/tests/metagpt/roles/ci/test_code_interpreter.py b/tests/metagpt/roles/ci/test_code_interpreter.py
index f23292965..9d2f2429b 100644
--- a/tests/metagpt/roles/ci/test_code_interpreter.py
+++ b/tests/metagpt/roles/ci/test_code_interpreter.py
@@ -17,3 +17,7 @@ async def test_code_interpreter(mocker, auto_run):
     rsp = await ci.run(requirement)
     logger.info(rsp)
     assert len(rsp.content) > 0
+
+    finished_tasks = ci.planner.plan.get_finished_tasks()
+    assert len(finished_tasks) > 0
+    assert len(finished_tasks[0].code) > 0  # check one task to see if code is recorded
diff --git a/tests/metagpt/utils/test_text.py b/tests/metagpt/utils/test_text.py
index 7003c7767..c9a9753be 100644
--- a/tests/metagpt/utils/test_text.py
+++ b/tests/metagpt/utils/test_text.py
@@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected
         (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
         (" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
         (" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
+        (" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8),
     ],
 )
 def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):