diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py
index 1d132eb8a..219a303c8 100644
--- a/examples/llm_hello_world.py
+++ b/examples/llm_hello_world.py
@@ -6,11 +6,9 @@
 @File    : llm_hello_world.py
 """
 import asyncio
-from pathlib import Path
 
 from metagpt.llm import LLM
 from metagpt.logs import logger
-from metagpt.utils.common import encode_image
 
 
 async def main():
@@ -29,12 +27,6 @@ async def main():
     if hasattr(llm, "completion"):
         logger.info(llm.completion(hello_msg))
 
-    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
-    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
-    img_base64 = encode_image(invoice_path)
-    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
-    assert "true" in res.lower()
-
 
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/examples/llm_vision.py b/examples/llm_vision.py
new file mode 100644
index 000000000..276decd59
--- /dev/null
+++ b/examples/llm_vision.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : example to run the ability of LLM vision
+
+import asyncio
+from pathlib import Path
+
+from metagpt.llm import LLM
+from metagpt.utils.common import encode_image
+
+
+async def main():
+    llm = LLM()
+
+    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
+    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
+    img_base64 = encode_image(invoice_path)
+    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
+    assert "true" in res.lower()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/metagpt/provider/general_api_requestor.py b/metagpt/provider/general_api_requestor.py
index 500cd1426..18f4dd909 100644
--- a/metagpt/provider/general_api_requestor.py
+++ b/metagpt/provider/general_api_requestor.py
@@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
         self, result: requests.Response, stream: bool
     ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
         """Returns the response(s) and a bool indicating whether it is a stream."""
-        if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
+        content_type = result.headers.get("Content-Type", "")
+        if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
             return (
                 self._interpret_response_line(line, result.status_code, result.headers, stream=True)
                 for line in parse_stream(result.iter_lines())
diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py
index 9108a1fba..9e8e5fb53 100644
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@@ -3,9 +3,8 @@
 # @Desc   : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
 
 from enum import Enum
+from typing import Optional
 
-import openai
-import zhipuai
 from requests import ConnectionError
 from tenacity import (
     after_log,
@@ -14,6 +13,7 @@ from tenacity import (
     stop_after_attempt,
     wait_random_exponential,
 )
+from zhipuai.types.chat.chat_completion import Completion
 
 from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.logs import log_llm_stream, logger
@@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
+from metagpt.utils.cost_manager import CostManager
 
 
 class ZhiPuEvent(Enum):
@@ -38,20 +39,15 @@ class ZhiPuAILLM(BaseLLM):
     """
 
     def __init__(self, config: LLMConfig):
-        self.__init_zhipuai(config)
-        self.llm = ZhiPuModelAPI
-        self.model = "chatglm_turbo"  # so far only one model, just use it
-        self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
         self.config = config
+        self.__init_zhipuai()
+        self.cost_manager: Optional[CostManager] = None
 
-    def __init_zhipuai(self, config: LLMConfig):
-        assert config.api_key
-        zhipuai.api_key = config.api_key
-        # due to use openai sdk, set the api_key but it will't be used.
-        # openai.api_key = zhipuai.api_key  # due to use openai sdk, set the api_key but it will't be used.
-        if config.proxy:
-            # FIXME: openai v1.x sdk has no proxy support
-            openai.proxy = config.proxy
+    def __init_zhipuai(self):
+        assert self.config.api_key
+        self.api_key = self.config.api_key
+        self.model = self.config.model  # so far, it support glm-3-turbo、glm-4
+        self.llm = ZhiPuModelAPI(api_key=self.api_key)
 
     def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
         kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
@@ -63,12 +59,12 @@ class ZhiPuAILLM(BaseLLM):
             try:
                 prompt_tokens = int(usage.get("prompt_tokens", 0))
                 completion_tokens = int(usage.get("completion_tokens", 0))
-                self.config.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
+                self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
             except Exception as e:
                 logger.error(f"zhipuai updats costs failed! exp: {e}")
 
     def completion(self, messages: list[dict], timeout=3) -> dict:
-        resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
+        resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
         usage = resp.usage.model_dump()
         self._update_costs(usage)
         return resp.model_dump()
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index a0fb3b70d..65f5fe76f 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -32,8 +32,8 @@ TOKEN_COSTS = {
     "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
     "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
     "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
-    "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
+    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
     "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
 }
 
@@ -58,7 +58,8 @@ TOKEN_MAX = {
     "gpt-4-vision-preview": 128000,
     "gpt-4-1106-vision-preview": 128000,
     "text-embedding-ada-002": 8192,
-    "chatglm_turbo": 32768,
+    "glm-3-turbo": 128000,
+    "glm-4": 128000,
     "gemini-pro": 32768,
 }
 
diff --git a/tests/metagpt/provider/test_zhipuai_api.py b/tests/metagpt/provider/test_zhipuai_api.py
index 798209710..ad2ececa2 100644
--- a/tests/metagpt/provider/test_zhipuai_api.py
+++ b/tests/metagpt/provider/test_zhipuai_api.py
@@ -17,7 +17,7 @@ default_resp = {
 }
 
 
-async def mock_zhipuai_acreate_stream(**kwargs):
+async def mock_zhipuai_acreate_stream(self, **kwargs):
     class MockResponse(object):
         async def _aread(self):
             class Iterator(object):
@@ -37,7 +37,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
     return MockResponse()
 
 
-async def mock_zhipuai_acreate(**kwargs) -> dict:
+async def mock_zhipuai_acreate(self, **kwargs) -> dict:
     return default_resp