diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py
index fb923d3e4..06913d176 100644
--- a/metagpt/configs/llm_config.py
+++ b/metagpt/configs/llm_config.py
@@ -42,6 +42,7 @@ class LLMConfig(YamlModel):
     api_version: Optional[str] = None
 
     model: Optional[str] = None  # also stands for DEPLOYMENT_NAME
+    pricing_plan: Optional[str] = None  # Cost Settlement Plan Parameters.
 
     # For Spark(Xunfei), maybe remove later
     app_id: Optional[str] = None
diff --git a/metagpt/provider/azure_openai_api.py b/metagpt/provider/azure_openai_api.py
index c215acf45..33331ae94 100644
--- a/metagpt/provider/azure_openai_api.py
+++ b/metagpt/provider/azure_openai_api.py
@@ -8,14 +8,10 @@
 """
 from openai import AsyncAzureOpenAI
 from openai._base_client import AsyncHttpxClientWrapper
-from openai.types import CompletionUsage
 
 from metagpt.configs.llm_config import LLMType
-from metagpt.logs import logger
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import OpenAILLM
-from metagpt.utils import TOKEN_COSTS, count_message_tokens, count_string_tokens
-from metagpt.utils.exceptions import handle_exception
 
 
 @register_provider(LLMType.AZURE)
@@ -29,6 +25,7 @@ class AzureOpenAILLM(OpenAILLM):
         # https://learn.microsoft.com/zh-cn/azure/ai-services/openai/how-to/migration?tabs=python-new%2Cdalle-fix
         self.aclient = AsyncAzureOpenAI(**kwargs)
         self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
+        self.pricing_plan = self.config.pricing_plan
 
     def _make_client_kwargs(self) -> dict:
         kwargs = dict(
@@ -43,38 +40,3 @@ class AzureOpenAILLM(OpenAILLM):
             kwargs["http_client"] = AsyncHttpxClientWrapper(**proxy_params)
 
         return kwargs
-
-    def _calc_usage(self, messages: list[dict], rsp: str) -> CompletionUsage:
-        usage = CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
-        if not self.config.calc_usage:
-            return usage
-
-        model_name = "gpt-35-turbo" if "gpt-3" in self.model.lower() else "gpt-4-turbo-preview"
-        try:
-            usage.prompt_tokens = count_message_tokens(messages, model_name)
-            usage.completion_tokens = count_string_tokens(rsp, model_name)
-        except Exception as e:
-            logger.error(f"usage calculation failed: {e}")
-
-        return usage
-
-    @handle_exception
-    def _update_costs(self, usage: CompletionUsage):
-        if self.config.calc_usage and usage and self.cost_manager:
-            model_name = self._get_azure_model()
-            # More about pricing: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
-            self.cost_manager.update_cost(usage.prompt_tokens, usage.completion_tokens, model_name)
-
-    def _get_azure_model(self) -> str:
-        models = [i.lower() for i in TOKEN_COSTS.keys() if "azure" in i]
-        mappings = {i: set(i.split("-")) for i in models}
-        words = self.model.lower().split("-")
-        weights = []
-        for k, v in mappings.items():
-            count = 0
-            for i in words:
-                if i in v:
-                    count += 1
-            weights.append((k, count))
-        sorted_list = sorted(weights, key=lambda x: x[1], reverse=True)
-        return sorted_list[0][0]
diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py
index d56453a85..9b0c331f3 100644
--- a/metagpt/provider/fireworks_api.py
+++ b/metagpt/provider/fireworks_api.py
@@ -81,14 +81,6 @@ class FireworksLLM(OpenAILLM):
         kwargs = dict(api_key=self.config.api_key, base_url=self.config.base_url)
         return kwargs
 
-    def _update_costs(self, usage: CompletionUsage):
-        if self.config.calc_usage and usage:
-            try:
-                # use FireworksCostManager not context.cost_manager
-                self.cost_manager.update_cost(usage.prompt_tokens, usage.completion_tokens, self.model)
-            except Exception as e:
-                logger.error(f"updating costs failed!, exp: {e}")
-
     def get_costs(self) -> Costs:
         return self.cost_manager.get_costs()
 
diff --git a/metagpt/provider/google_gemini_api.py b/metagpt/provider/google_gemini_api.py
index 6df814b55..5584114d7 100644
--- a/metagpt/provider/google_gemini_api.py
+++ b/metagpt/provider/google_gemini_api.py
@@ -53,6 +53,7 @@ class GeminiLLM(BaseLLM):
         self.__init_gemini(config)
         self.config = config
         self.model = "gemini-pro"  # so far only one model
+        self.pricing_plan = self.config.pricing_plan or self.model
         self.llm = GeminiGenerativeModel(model_name=self.model)
 
     def __init_gemini(self, config: LLMConfig):
@@ -70,16 +71,6 @@ class GeminiLLM(BaseLLM):
         kwargs = {"contents": messages, "generation_config": GenerationConfig(temperature=0.3), "stream": stream}
         return kwargs
 
-    def _update_costs(self, usage: dict):
-        """update each request's token cost"""
-        if self.config.calc_usage:
-            try:
-                prompt_tokens = int(usage.get("prompt_tokens", 0))
-                completion_tokens = int(usage.get("completion_tokens", 0))
-                self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
-            except Exception as e:
-                logger.error(f"google gemini updats costs failed! exp: {e}")
-
     def get_choice_text(self, resp: GenerateContentResponse) -> str:
         return resp.text
 
diff --git a/metagpt/provider/metagpt_api.py b/metagpt/provider/metagpt_api.py
index 7495079da..8c7daeac6 100644
--- a/metagpt/provider/metagpt_api.py
+++ b/metagpt/provider/metagpt_api.py
@@ -16,12 +16,9 @@ from metagpt.utils.exceptions import handle_exception
 @register_provider(LLMType.METAGPT)
 class MetaGPTLLM(OpenAILLM):
     def _calc_usage(self, messages: list[dict], rsp: str) -> CompletionUsage:
-        usage = CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
-
         # The current billing is based on usage frequency. If there is a future billing logic based on the
         # number of tokens, please refine the logic here accordingly.
-
-        return usage
+        return CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
 
     @handle_exception
     def _update_costs(self, usage: CompletionUsage):
diff --git a/metagpt/provider/ollama_api.py b/metagpt/provider/ollama_api.py
index c9103b018..89e171cf5 100644
--- a/metagpt/provider/ollama_api.py
+++ b/metagpt/provider/ollama_api.py
@@ -36,26 +36,17 @@ class OllamaLLM(BaseLLM):
         self.suffix_url = "/chat"
         self.http_method = "post"
         self.use_system_prompt = False
-        self._cost_manager = TokenCostManager()
+        self.cost_manager = TokenCostManager()
 
     def __init_ollama(self, config: LLMConfig):
         assert config.base_url, "ollama base url is required!"
         self.model = config.model
+        self.pricing_plan = self.model
 
     def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
         kwargs = {"model": self.model, "messages": messages, "options": {"temperature": 0.3}, "stream": stream}
         return kwargs
 
-    def _update_costs(self, usage: dict):
-        """update each request's token cost"""
-        if self.config.calc_usage:
-            try:
-                prompt_tokens = int(usage.get("prompt_tokens", 0))
-                completion_tokens = int(usage.get("completion_tokens", 0))
-                self._cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
-            except Exception as e:
-                logger.error(f"ollama updats costs failed! exp: {e}")
-
     def get_choice_text(self, resp: dict) -> str:
         """get the resp content from llm response"""
         assist_msg = resp.get("message", {})
diff --git a/metagpt/provider/open_llm_api.py b/metagpt/provider/open_llm_api.py
index a29b263a4..bc2f40fff 100644
--- a/metagpt/provider/open_llm_api.py
+++ b/metagpt/provider/open_llm_api.py
@@ -16,7 +16,7 @@ from metagpt.utils.token_counter import count_message_tokens, count_string_token
 class OpenLLM(OpenAILLM):
     def __init__(self, config: LLMConfig):
         super().__init__(config)
-        self._cost_manager = TokenCostManager()
+        self.cost_manager = TokenCostManager()
 
     def _make_client_kwargs(self) -> dict:
         kwargs = dict(api_key="sk-xxx", base_url=self.config.base_url)
@@ -35,13 +35,5 @@ class OpenLLM(OpenAILLM):
 
         return usage
 
-    def _update_costs(self, usage: CompletionUsage):
-        if self.config.calc_usage and usage:
-            try:
-                # use OpenLLMCostManager not CONFIG.cost_manager
-                self._cost_manager.update_cost(usage.prompt_tokens, usage.completion_tokens, self.model)
-            except Exception as e:
-                logger.error(f"updating costs failed!, exp: {e}")
-
     def get_costs(self) -> Costs:
-        return self._cost_manager.get_costs()
+        return self.cost_manager.get_costs()
diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py
index d6944eae6..f33a4136b 100644
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@@ -6,9 +6,10 @@
 @Modified By: mashenquan, 2023/11/21. Fix bug: ReadTimeout.
 @Modified By: mashenquan, 2023/12/1. Fix bug: Unclosed connection caused by openai 0.x.
 """
+from __future__ import annotations
 
 import json
-from typing import AsyncIterator, Optional, Union
+from typing import AsyncIterator, Dict, Optional, Union
 
 from openai import APIConnectionError, AsyncOpenAI, AsyncStream
 from openai._base_client import AsyncHttpxClientWrapper
@@ -61,6 +62,7 @@ class OpenAILLM(BaseLLM):
 
     def _init_model(self):
         self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
+        self.pricing_plan = self.config.pricing_plan or self.model
 
     def _init_client(self):
         """https://github.com/openai/openai-python#async-usage"""
@@ -209,17 +211,23 @@ class OpenAILLM(BaseLLM):
             return usage
 
         try:
-            usage.prompt_tokens = count_message_tokens(messages, self.model)
-            usage.completion_tokens = count_string_tokens(rsp, self.model)
+            usage.prompt_tokens = count_message_tokens(messages, self.pricing_plan)
+            usage.completion_tokens = count_string_tokens(rsp, self.pricing_plan)
         except Exception as e:
             logger.error(f"usage calculation failed: {e}")
 
         return usage
 
     @handle_exception
-    def _update_costs(self, usage: CompletionUsage):
+    def _update_costs(self, usage: CompletionUsage | Dict):
         if self.config.calc_usage and usage and self.cost_manager:
-            self.cost_manager.update_cost(usage.prompt_tokens, usage.completion_tokens, self.model)
+            if isinstance(usage, Dict):
+                prompt_tokens = int(usage.get("prompt_tokens", 0))
+                completion_tokens = int(usage.get("completion_tokens", 0))
+            else:
+                prompt_tokens = usage.prompt_tokens
+                completion_tokens = usage.completion_tokens
+            self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.pricing_plan)
 
     def get_costs(self) -> Costs:
         if not self.cost_manager:
diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py
index 9108a1fba..3bc130090 100644
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@@ -3,6 +3,7 @@
 # @Desc   : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
 
 from enum import Enum
+from typing import Optional
 
 import openai
 import zhipuai
@@ -21,6 +22,7 @@ from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
+from metagpt.utils.cost_manager import CostManager
 
 
 class ZhiPuEvent(Enum):
@@ -41,8 +43,10 @@ class ZhiPuAILLM(BaseLLM):
         self.__init_zhipuai(config)
         self.llm = ZhiPuModelAPI
         self.model = "chatglm_turbo"  # so far only one model, just use it
+        self.pricing_plan = self.config.pricing_plan or self.model
         self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
         self.config = config
+        self.cost_manager: Optional[CostManager] = None
 
     def __init_zhipuai(self, config: LLMConfig):
         assert config.api_key
@@ -57,16 +61,6 @@ class ZhiPuAILLM(BaseLLM):
         kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
         return kwargs
 
-    def _update_costs(self, usage: dict):
-        """update each request's token cost"""
-        if self.config.calc_usage:
-            try:
-                prompt_tokens = int(usage.get("prompt_tokens", 0))
-                completion_tokens = int(usage.get("completion_tokens", 0))
-                self.config.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model)
-            except Exception as e:
-                logger.error(f"zhipuai updats costs failed! exp: {e}")
-
     def completion(self, messages: list[dict], timeout=3) -> dict:
         resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
         usage = resp.usage.model_dump()
diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py
index 0e505db9a..551ecab98 100644
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@@ -39,7 +39,7 @@ class CostManager(BaseModel):
         completion_tokens (int): The number of tokens used in the completion.
         model (str): The model used for the API call.
         """
-        if prompt_tokens + completion_tokens == 0:
+        if prompt_tokens + completion_tokens == 0 or not model:
             return
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index c6bf2f0d7..a7beea579 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -34,14 +34,10 @@ TOKEN_COSTS = {
     "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
     "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
     "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
-    # Azure
-    "azure-gpt-3.5-turbo-4k": {"prompt": 0.0015, "completion": 0.002},
-    "azure-gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
-    "azure-gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
-    "azure-gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
-    "azure-gpt-4-turbo-vision": {"prompt": 0.01, "completion": 0.03},
-    "azure-gpt-4-8k": {"prompt": 0.03, "completion": 0.06},
-    "azure-gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
+    "gpt-3.5-turbo-4k": {"prompt": 0.0015, "completion": 0.002},
+    "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
+    "gpt-4-turbo-vision": {"prompt": 0.01, "completion": 0.03},
+    "gpt-4-8k": {"prompt": 0.03, "completion": 0.06},
 }
 
 TOKEN_MAX = {