diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml
new file mode 100644
index 000000000..7f4fee53e
--- /dev/null
+++ b/.github/workflows/build-package.yaml
@@ -0,0 +1,34 @@
+name: Build and upload python package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+        cache: 'pip'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install -e.
+        pip install setuptools wheel twine
+    - name: Set package version
+      run: |
+        export VERSION="${GITHUB_REF#refs/tags/v}"
+        sed -i "s/version=.*/version=\"${VERSION}\",/" setup.py
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        python setup.py bdist_wheel sdist
+        twine upload dist/*
\ No newline at end of file
diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml
index df4a71d69..fd56c42fb 100644
--- a/.github/workflows/unittest.yaml
+++ b/.github/workflows/unittest.yaml
@@ -1,13 +1,10 @@
-name: Python application test
+name: Unit Tests
 
 on:
   workflow_dispatch:
   pull_request_target:
   push: 
-    branches: 
-      - 'main'
-      - 'dev'
-      - '*-release'
+    branches:
       - '*-debugger'
 
 jobs:
@@ -79,3 +76,4 @@ jobs:
       uses: codecov/codecov-action@v3
       env:
         CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      if: ${{ always() }}
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
index d3f7ea408..9bc62f849 100644
--- a/docs/ROADMAP.md
+++ b/docs/ROADMAP.md
@@ -9,24 +9,22 @@ ### Short-term Objective
 
 1. Become the multi-agent framework with the highest ROI.
 2. Support fully automatic implementation of medium-sized projects (around 2000 lines of code).
-3. Implement most identified tasks, reaching version 0.5.
+3. Implement most identified tasks, reaching version 1.0.
 
 ### Tasks
 
-To reach version v0.5, approximately 70% of the following tasks need to be completed.
-
 1. Usability
    1. ~~Release v0.01 pip package to try to solve issues like npm installation (though not necessarily successfully)~~ (v0.3.0)
-   2. Support for overall save and recovery of software companies
+   2. ~~Support for overall save and recovery of software companies~~ (v0.6.0)
    3. ~~Support human confirmation and modification during the process~~ (v0.3.0) New: Support human confirmation and modification with fewer constrainsts and a more user-friendly interface
    4. Support process caching: Consider carefully whether to add server caching mechanism
    5. ~~Resolve occasional failure to follow instruction under current prompts, causing code parsing errors, through stricter system prompts~~ (v0.4.0, with function call)
    6. Write documentation, describing the current features and usage at all levels (ongoing, continuously adding contents to [documentation site](https://docs.deepwisdom.ai/main/en/guide/get_started/introduction.html))
    7. ~~Support Docker~~
 2. Features
-   1. Support a more standard and stable parser (need to analyze the format that the current LLM is better at)
-   2. ~~Establish a separate output queue, differentiated from the message queue~~
-   3. Attempt to atomize all role work, but this may significantly increase token overhead
+   1. ~~Support a more standard and stable parser (need to analyze the format that the current LLM is better at)~~ (v0.5.0)
+   2. ~~Establish a separate output queue, differentiated from the message queue~~ (v0.5.0)
+   3. ~~Attempt to atomize all role work, but this may significantly increase token overhead~~ (v0.5.0)
    4. Complete the design and implementation of module breakdown
    5. Support various modes of memory: clearly distinguish between long-term and short-term memory
    6. Perfect the test role, and carry out necessary interactions with humans
@@ -43,10 +41,10 @@ ### Tasks
 4. Actions
    1. ~~Implementation: Search~~ (v0.2.1)
    2. Implementation: Knowledge search, supporting 10+ data formats
-   3. Implementation: Data EDA (expected v0.6.0)
-   4. Implementation: Review
-   5. ~~Implementation~~: Add Document (v0.5.0)
-   6. ~~Implementation~~: Delete Document (v0.5.0)
+   3. Implementation: Data EDA (expected v0.7.0)
+   4. Implementation: Review & Revise (expected v0.7.0)
+   5. ~~Implementation: Add Document~~ (v0.5.0)
+   6. ~~Implementation: Delete Document~~ (v0.5.0)
    7. Implementation: Self-training
    8. ~~Implementation: DebugError~~ (v0.2.1)
    9. Implementation: Generate reliable unit tests based on YAPI
@@ -64,23 +62,23 @@ ### Tasks
    3. ~~Support Playwright apis~~
 7. Roles
    1. Perfect the action pool/skill pool for each role
-   2. Red Book blogger
-   3. E-commerce seller
-   4. Data analyst (expected v0.6.0)
-   5. News observer
-   6. ~~Institutional researcher~~ (v0.2.1)
+   2. E-commerce seller
+   3. Data analyst (expected v0.7.0)
+   4. News observer
+   5. ~~Institutional researcher~~ (v0.2.1)
 8. Evaluation
    1. Support an evaluation on a game dataset (experimentation done with game agents)
    2. Reproduce papers, implement full skill acquisition for a single game role, achieving SOTA results (experimentation done with game agents)
-   3. Support an evaluation on a math dataset (expected v0.6.0)
+   3. Support an evaluation on a math dataset (expected v0.7.0)
    4. Reproduce papers, achieving SOTA results for current mathematical problem solving process
 9. LLM
    1. Support Claude underlying API
    2. ~~Support Azure asynchronous API~~
    3. Support streaming version of all APIs
    4. ~~Make gpt-3.5-turbo available (HARD)~~
+   5. Support 
 10. Other
-    1. Clean up existing unused code
+    1. ~~Clean up existing unused code~~
     2. Unify all code styles and establish contribution standards
-    3. Multi-language support
-    4. Multi-programming-language support
+    3. ~~Multi-language support~~
+    4. ~~Multi-programming-language support~~
diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py
index b9847850e..0cd440ea1 100644
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@@ -59,7 +59,9 @@ class BaseLLM(ABC):
         if system_msgs:
             message = self._system_msgs(system_msgs)
         else:
-            message = [self._default_system_msg()] if self.use_system_prompt else []
+            message = [self._default_system_msg()]
+        if not self.use_system_prompt:
+            message = []
         if format_msgs:
             message.extend(format_msgs)
         message.append(self._user_msg(msg))
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index a20fe89e5..10b60d30e 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -145,6 +145,9 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
         self.pydantic_rebuild_model()
         super().__init__(**data)
 
+        if self.is_human:
+            self.llm = HumanProvider()
+
         self.llm.system_prompt = self._get_prefix()
         self._watch(data.get("watch") or [UserRequirement])
 
diff --git a/tests/metagpt/roles/test_role.py b/tests/metagpt/roles/test_role.py
index b3b54455e..bef71f9a5 100644
--- a/tests/metagpt/roles/test_role.py
+++ b/tests/metagpt/roles/test_role.py
@@ -3,6 +3,7 @@
 # @Desc   : unittest of Role
 import pytest
 
+from metagpt.llm import HumanProvider
 from metagpt.roles.role import Role
 
 
@@ -12,5 +13,10 @@ def test_role_desc():
     assert role.desc == "Best Seller"
 
 
+def test_role_human():
+    role = Role(is_human=True)
+    assert isinstance(role.llm, HumanProvider)
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-s"])
diff --git a/tests/metagpt/utils/test_redis.py b/tests/metagpt/utils/test_redis.py
index 8e9cf710a..5d6eb1042 100644
--- a/tests/metagpt/utils/test_redis.py
+++ b/tests/metagpt/utils/test_redis.py
@@ -5,25 +5,26 @@
 @Author  : mashenquan
 @File    : test_redis.py
 """
+from unittest.mock import AsyncMock
 
-import mock
 import pytest
+from pytest_mock import mocker
 
 from metagpt.config2 import Config
 from metagpt.utils.redis import Redis
 
 
 async def async_mock_from_url(*args, **kwargs):
-    mock_client = mock.AsyncMock()
+    mock_client = AsyncMock()
     mock_client.set.return_value = None
     mock_client.get.side_effect = [b"test", b""]
     return mock_client
 
 
 @pytest.mark.asyncio
-@mock.patch("aioredis.from_url", return_value=async_mock_from_url())
 async def test_redis(i):
     redis = Config.default().redis
+    mocker.patch("aioredis.from_url", return_value=async_mock_from_url())
 
     conn = Redis(redis)
     await conn.set("test", "test", timeout_sec=0)
diff --git a/tests/mock/mock_llm.py b/tests/mock/mock_llm.py
index e1b440ca9..bef380c83 100644
--- a/tests/mock/mock_llm.py
+++ b/tests/mock/mock_llm.py
@@ -42,7 +42,9 @@ class MockLLM(OpenAILLM):
         if system_msgs:
             message = self._system_msgs(system_msgs)
         else:
-            message = [self._default_system_msg()] if self.use_system_prompt else []
+            message = [self._default_system_msg()]
+        if not self.use_system_prompt:
+            message = []
         if format_msgs:
             message.extend(format_msgs)
         message.append(self._user_msg(msg))