diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml
index 02e6ee3d0..7b884d149 100644
--- a/.github/workflows/unittest.yaml
+++ b/.github/workflows/unittest.yaml
@@ -19,14 +19,11 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install -e .[test]
-        npm install -g @mermaid-js/mermaid-cli
-        playwright install --with-deps chromium
+        sh tests/scripts/run_install_deps.sh
     - name: Test with pytest
       run: |
         echo "${{ secrets.METAGPT_KEY_YAML }}" | base64 -d > config/key.yaml
-        pytest tests/ --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov=./metagpt/ --cov-report=xml:cov.xml --cov-report=html:htmlcov
+        pytest tests/ --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov=./metagpt/ --cov-report=xml:cov.xml --cov-report=html:htmlcov --durations=20
         coverage report -m
     - name: Upload pytest test results
       uses: actions/upload-artifact@v3
diff --git a/docs/scripts/coverage.sh b/docs/scripts/coverage.sh
index be55b3b65..648d9b412 100755
--- a/docs/scripts/coverage.sh
+++ b/docs/scripts/coverage.sh
@@ -1 +1 @@
-coverage run --source ./metagpt -m pytest && coverage report -m && coverage html && open htmlcov/index.html
+coverage run --source ./metagpt -m pytest --durations=0 && coverage report -m && coverage html && open htmlcov/index.html
diff --git a/metagpt/actions/project_management.py b/metagpt/actions/project_management.py
index b33f3426d..e40c2034b 100644
--- a/metagpt/actions/project_management.py
+++ b/metagpt/actions/project_management.py
@@ -89,9 +89,6 @@ class WriteTasks(Action):
 
     async def _run_new_tasks(self, context, schema=CONFIG.prompt_schema):
         node = await PM_NODE.fill(context, self.llm, schema)
-        # prompt_template, format_example = get_template(templates, format)
-        # prompt = prompt_template.format(context=context, format_example=format_example)
-        # rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format)
         return node
 
     async def _merge(self, system_design_doc, task_doc, schema=CONFIG.prompt_schema) -> Document:
diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py
index 4d00adbc7..52dd96b1a 100644
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@@ -43,7 +43,7 @@ class BaseLLM(ABC):
         if system_msgs:
             message = self._system_msgs(system_msgs)
         else:
-            message = [self._default_system_msg()]
+            message = [self._default_system_msg()] if self.use_system_prompt else []
         if format_msgs:
             message.extend(format_msgs)
         message.append(self._user_msg(msg))
diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py
index 638b0703d..f0af68818 100644
--- a/metagpt/provider/fireworks_api.py
+++ b/metagpt/provider/fireworks_api.py
@@ -64,8 +64,9 @@ class FireworksCostManager(CostManager):
         token_costs = self.model_grade_token_costs(model)
         cost = (prompt_tokens * token_costs["prompt"] + completion_tokens * token_costs["completion"]) / 1000000
         self.total_cost += cost
+        max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
         logger.info(
-            f"Total running cost: ${self.total_cost:.4f} | Max budget: ${CONFIG.max_budget:.3f} | "
+            f"Total running cost: ${self.total_cost:.4f} | Max budget: ${max_budget:.3f} | "
             f"Current cost: ${cost:.4f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
         )
         CONFIG.total_cost = self.total_cost
diff --git a/metagpt/provider/ollama_api.py b/metagpt/provider/ollama_api.py
index 95b944bf3..8ee04de7d 100644
--- a/metagpt/provider/ollama_api.py
+++ b/metagpt/provider/ollama_api.py
@@ -30,9 +30,9 @@ class OllamaCostManager(CostManager):
         """
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
-
+        max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
         logger.info(
-            f"Max budget: ${CONFIG.max_budget:.3f} | "
+            f"Max budget: ${max_budget:.3f} | "
             f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
         )
         CONFIG.total_cost = self.total_cost
diff --git a/metagpt/provider/open_llm_api.py b/metagpt/provider/open_llm_api.py
index 7f5870702..b0c484f5a 100644
--- a/metagpt/provider/open_llm_api.py
+++ b/metagpt/provider/open_llm_api.py
@@ -26,9 +26,9 @@ class OpenLLMCostManager(CostManager):
         """
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
-
+        max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
         logger.info(
-            f"Max budget: ${CONFIG.max_budget:.3f} | reference "
+            f"Max budget: ${max_budget:.3f} | reference "
             f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
         )
 
diff --git a/metagpt/provider/zhipuai/zhipu_model_api.py b/metagpt/provider/zhipuai/zhipu_model_api.py
index 72be0f333..16d4102d4 100644
--- a/metagpt/provider/zhipuai/zhipu_model_api.py
+++ b/metagpt/provider/zhipuai/zhipu_model_api.py
@@ -2,6 +2,8 @@
 # -*- coding: utf-8 -*-
 # @Desc   : zhipu model api to support sync & async for invoke & sse_invoke
 
+import json
+
 import zhipuai
 from zhipuai.model_api.api import InvokeType, ModelAPI
 from zhipuai.utils.http_client import headers as zhipuai_default_headers
@@ -51,7 +53,6 @@ class ZhiPuModelAPI(ModelAPI):
             params=kwargs,
             request_timeout=zhipuai.api_timeout_seconds,
         )
-
         return result
 
     @classmethod
@@ -61,6 +62,8 @@ class ZhiPuModelAPI(ModelAPI):
         resp = await cls.arequest(
             invoke_type=InvokeType.SYNC, stream=False, method="post", headers=headers, kwargs=kwargs
         )
+        resp = resp.decode("utf-8")
+        resp = json.loads(resp)
         return resp
 
     @classmethod
diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py
index addbe58af..865b7fce1 100644
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@@ -38,12 +38,11 @@ class ZhiPuAILLM(BaseLLM):
     From now, there is only one model named `chatglm_turbo`
     """
 
-    use_system_prompt: bool = False  # zhipuai has no system prompt when use api
-
     def __init__(self):
         self.__init_zhipuai(CONFIG)
         self.llm = ZhiPuModelAPI
         self.model = "chatglm_turbo"  # so far only one model, just use it
+        self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
 
     def __init_zhipuai(self, config: CONFIG):
         assert config.zhipuai_api_key
@@ -101,7 +100,6 @@ class ZhiPuAILLM(BaseLLM):
             elif event.event == ZhiPuEvent.ERROR.value or event.event == ZhiPuEvent.INTERRUPTED.value:
                 content = event.data
                 logger.error(f"event error: {content}", end="")
-                collected_content.append([content])
             elif event.event == ZhiPuEvent.FINISH.value:
                 """
                 event.meta
diff --git a/metagpt/schema.py b/metagpt/schema.py
index 91158ffeb..e36bef395 100644
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@@ -55,7 +55,16 @@ from metagpt.utils.serialize import (
 
 
 class SerializationMixin(BaseModel):
-    """SereDeserMixin for subclass' ser&deser"""
+    """
+    PolyMorphic subclasses Serialization / Deserialization Mixin
+    - First of all, we need to know that pydantic is not designed for polymorphism.
+    - If Engineer is subclass of Role, it would be serialized as Role. If we want to serialize it as Engineer, we need
+        to add `class name` to Engineer. So we need Engineer inherit SerializationMixin.
+
+    More details:
+    - https://docs.pydantic.dev/latest/concepts/serialization/
+    - https://github.com/pydantic/pydantic/discussions/7008 discuss about avoid `__get_pydantic_core_schema__`
+    """
 
     __is_polymorphic_base = False
     __subclasses_map__ = {}
diff --git a/metagpt/strategy/tot.py b/metagpt/strategy/tot.py
index a32cfdf40..4f33698bf 100644
--- a/metagpt/strategy/tot.py
+++ b/metagpt/strategy/tot.py
@@ -5,7 +5,7 @@
 import asyncio
 from typing import Any, List
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 from metagpt.llm import LLM
 from metagpt.logs import logger
@@ -29,6 +29,8 @@ Output a list of jsons following the format:
 
 
 class ThoughtSolverBase(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
     thought_tree: str = ""
     llm: BaseLLM = Field(default_factory=LLM, exclude=True)
     config: ThoughtSolverConfig = Field(default_factory=ThoughtSolverConfig)
diff --git a/metagpt/utils/get_template.py b/metagpt/utils/get_template.py
deleted file mode 100644
index 7e05e5d5e..000000000
--- a/metagpt/utils/get_template.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-@Time    : 2023/9/19 20:39
-@Author  : femto Zheng
-@File    : get_template.py
-"""
-from metagpt.config import CONFIG
-
-
-def get_template(templates, schema=CONFIG.prompt_schema):
-    selected_templates = templates.get(schema)
-    if selected_templates is None:
-        raise ValueError(f"Can't find {schema} in passed in templates")
-
-    # Extract the selected templates
-    prompt_template = selected_templates["PROMPT_TEMPLATE"]
-    format_example = selected_templates["FORMAT_EXAMPLE"]
-
-    return prompt_template, format_example
diff --git a/requirements.txt b/requirements.txt
index 832b4c1c8..9caea13f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,7 +29,7 @@ PyYAML==6.0.1
 setuptools==65.6.3
 tenacity==8.2.2
 tiktoken==0.5.2
-tqdm==4.64.0
+tqdm==4.65.0
 #unstructured[local-inference]
 # selenium>4
 # webdriver_manager<3.9
@@ -58,3 +58,4 @@ websockets~=12.0
 networkx~=3.2.1
 google-generativeai==0.3.1
 playwright==1.40.0
+anytree
diff --git a/setup.py b/setup.py
index c3d04ddba..29c44d3c1 100644
--- a/setup.py
+++ b/setup.py
@@ -38,10 +38,11 @@ extras_require["test"] = [
     "pytest-cov",
     "pytest-mock",
     "pytest-html",
+    "pytest-xdist",
     "connexion[uvicorn]~=3.0.5",
     "azure-cognitiveservices-speech~=1.31.0",
     "aioboto3~=11.3.0",
-    "chromadb==0.3.23",
+    "chromadb==0.4.14",
     "gradio==3.0.0",
     "grpcio-status==1.48.2",
 ]
diff --git a/tests/conftest.py b/tests/conftest.py
index 54a042e90..d88b31ce5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -27,6 +27,10 @@ class Context:
 
     @property
     def llm_api(self):
+        # 1. 初始化llm，带有缓存结果
+        # 2. 如果缓存query，那么直接返回缓存结果
+        # 3. 如果没有缓存query，那么调用llm_api，返回结果
+        # 4. 如果有缓存query，那么更新缓存结果
         return self._llm_api
 
 
diff --git a/tests/metagpt/provider/test_openai.py b/tests/metagpt/provider/test_openai.py
index ddc290731..6166a82de 100644
--- a/tests/metagpt/provider/test_openai.py
+++ b/tests/metagpt/provider/test_openai.py
@@ -8,8 +8,6 @@ from metagpt.schema import UserMessage
 
 CONFIG.openai_proxy = None
 
-print("openai_api_key ", CONFIG.openai_api_key)
-
 
 @pytest.mark.asyncio
 async def test_aask_code():
diff --git a/tests/metagpt/strategy/__init__.py b/tests/metagpt/strategy/__init__.py
new file mode 100644
index 000000000..e95a9b4ed
--- /dev/null
+++ b/tests/metagpt/strategy/__init__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/12/30 00:33
+@Author  : alexanderwu
+@File    : __init__.py
+"""
diff --git a/metagpt/strategy/examples/__init__.py b/tests/metagpt/strategy/examples/__init__.py
similarity index 100%
rename from metagpt/strategy/examples/__init__.py
rename to tests/metagpt/strategy/examples/__init__.py
diff --git a/metagpt/strategy/examples/creative_writing.py b/tests/metagpt/strategy/examples/creative_writing.py
similarity index 87%
rename from metagpt/strategy/examples/creative_writing.py
rename to tests/metagpt/strategy/examples/creative_writing.py
index 94efd9264..59a3c94d7 100644
--- a/metagpt/strategy/examples/creative_writing.py
+++ b/tests/metagpt/strategy/examples/creative_writing.py
@@ -3,8 +3,8 @@
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
 import re
+from typing import Dict
 
-from metagpt.strategy.prompt_templates.creative_writing import cot_prompt, vote_prompt
 from metagpt.strategy.tot import TreeofThought
 from metagpt.strategy.tot_schema import (
     BaseEvaluator,
@@ -12,6 +12,10 @@ from metagpt.strategy.tot_schema import (
     Strategy,
     ThoughtSolverConfig,
 )
+from tests.metagpt.strategy.prompt_templates.creative_writing import (
+    cot_prompt,
+    vote_prompt,
+)
 
 
 class TextGenParser(BaseParser):
@@ -31,8 +35,8 @@ class TextGenParser(BaseParser):
 
 
 class TextGenEvaluator(BaseEvaluator):
-    value_map = {"impossible": 0.001, "likely": 1, "sure": 20}  # TODO: ad hoc
-    status_map = {val: key for key, val in value_map.items()}
+    value_map: Dict[str, float] = {"impossible": 0.001, "likely": 1, "sure": 20}  # TODO: ad hoc
+    status_map: Dict = {val: key for key, val in value_map.items()}
 
     def __call__(self, evaluation: str, **kwargs) -> float:
         try:
@@ -59,7 +63,7 @@ class TextGenEvaluator(BaseEvaluator):
         return status
 
 
-if __name__ == "__main__":
+def test_creative_writing():
     import asyncio
 
     initial_prompt = """It isn't difficult to do a handstand if you just stand on your hands. It caught him off guard that space smelled of seared steak. When she didn’t like a guy who was trying to pick her up, she started using sign language. Each person who knows you has a different perception of who you are."""
diff --git a/metagpt/strategy/examples/game24.py b/tests/metagpt/strategy/examples/game24.py
similarity index 85%
rename from metagpt/strategy/examples/game24.py
rename to tests/metagpt/strategy/examples/game24.py
index 32e4ede02..c26c8da88 100644
--- a/metagpt/strategy/examples/game24.py
+++ b/tests/metagpt/strategy/examples/game24.py
@@ -3,8 +3,8 @@
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
 import re
+from typing import Dict
 
-from metagpt.strategy.prompt_templates.game24 import propose_prompt, value_prompt
 from metagpt.strategy.tot import TreeofThought
 from metagpt.strategy.tot_schema import (
     BaseEvaluator,
@@ -12,6 +12,7 @@ from metagpt.strategy.tot_schema import (
     Strategy,
     ThoughtSolverConfig,
 )
+from tests.metagpt.strategy.prompt_templates.game24 import propose_prompt, value_prompt
 
 
 class Game24Parser(BaseParser):
@@ -31,8 +32,8 @@ class Game24Parser(BaseParser):
 
 
 class Game24Evaluator(BaseEvaluator):
-    value_map = {"impossible": 0.001, "likely": 1, "sure": 20}  # TODO: ad hoc
-    status_map = {val: key for key, val in value_map.items()}
+    value_map: Dict[str, float] = {"impossible": 0.001, "likely": 1, "sure": 20}  # TODO: ad hoc
+    status_map: Dict = {val: key for key, val in value_map.items()}
 
     def __call__(self, evaluation: str, **kwargs) -> float:
         try:
@@ -51,7 +52,7 @@ class Game24Evaluator(BaseEvaluator):
         return status
 
 
-if __name__ == "__main__":
+def test_game24():
     import asyncio
 
     initial_prompt = """4 5 6 10"""
diff --git a/metagpt/strategy/prompt_templates/__init__.py b/tests/metagpt/strategy/prompt_templates/__init__.py
similarity index 100%
rename from metagpt/strategy/prompt_templates/__init__.py
rename to tests/metagpt/strategy/prompt_templates/__init__.py
diff --git a/metagpt/strategy/prompt_templates/creative_writing.py b/tests/metagpt/strategy/prompt_templates/creative_writing.py
similarity index 100%
rename from metagpt/strategy/prompt_templates/creative_writing.py
rename to tests/metagpt/strategy/prompt_templates/creative_writing.py
diff --git a/metagpt/strategy/prompt_templates/game24.py b/tests/metagpt/strategy/prompt_templates/game24.py
similarity index 100%
rename from metagpt/strategy/prompt_templates/game24.py
rename to tests/metagpt/strategy/prompt_templates/game24.py
diff --git a/tests/scripts/run_install_deps.sh b/tests/scripts/run_install_deps.sh
new file mode 100644
index 000000000..2758e24da
--- /dev/null
+++ b/tests/scripts/run_install_deps.sh
@@ -0,0 +1,4 @@
+python -m pip install --upgrade pip
+pip install -e .[test]
+npm install -g @mermaid-js/mermaid-cli
+playwright install --with-deps chromium
\ No newline at end of file