feat: merge geekan:main

2026-07-20 16:51:07 +02:00 · 2024-03-05 10:59:34 +08:00 · 2024-03-05 10:59:34 +08:00 · e22a28215d
commit e22a28215d
parent 3b1644b7ff 0e63b92883
102 changed files with 1766 additions and 756 deletions
--- a/.github/workflows/fulltest.yaml
+++ b/.github/workflows/fulltest.yaml
@ -54,7 +54,6 @@ jobs:
        export ALLOW_OPENAI_API_CALL=0
        echo "${{ secrets.METAGPT_KEY_YAML }}" | base64 -d > config/key.yaml
        mkdir -p ~/.metagpt && echo "${{ secrets.METAGPT_CONFIG2_YAML }}" | base64 -d > ~/.metagpt/config2.yaml
-        echo "${{ secrets.SPARK_YAML }}" | base64 -d > ~/.metagpt/spark.yaml
        pytest tests/ --doctest-modules --cov=./metagpt/ --cov-report=xml:cov.xml --cov-report=html:htmlcov --durations=20 | tee unittest.txt
    - name: Show coverage report
      run: |
--- a/.github/workflows/unittest.yaml
+++ b/.github/workflows/unittest.yaml
@ -31,7 +31,7 @@ jobs:
    - name: Test with pytest
      run: |
        export ALLOW_OPENAI_API_CALL=0
-        mkdir -p ~/.metagpt && cp tests/config2.yaml ~/.metagpt/config2.yaml && cp tests/spark.yaml ~/.metagpt/spark.yaml
+        mkdir -p ~/.metagpt && cp tests/config2.yaml ~/.metagpt/config2.yaml
        pytest tests/ --doctest-modules --cov=./metagpt/ --cov-report=xml:cov.xml --cov-report=html:htmlcov --durations=20 | tee unittest.txt
    - name: Show coverage report
      run: |
--- a/README.md
+++ b/README.md
@ -26,6 +26,8 @@ # MetaGPT: The Multi-Agent Framework
 </p>

 ## News
+🚀 Feb. 08, 2024: [v0.7.0](https://github.com/geekan/MetaGPT/releases/tag/v0.7.0) released, supporting assigning different LLMs to different Roles. We also introduced [Interpreter](https://github.com/geekan/MetaGPT/blob/main/examples/mi/README.md), a powerful agent capable of solving a wide range of real-world problems.
+
 🚀 Jan. 16, 2024: Our paper [MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework
 ](https://arxiv.org/abs/2308.00352) accepted for oral presentation **(top 1.2%)** at ICLR 2024, **ranking #1** in the LLM-based Agent category.

@ -60,7 +62,27 @@ ### Pip installation

 ```bash
 pip install metagpt
-metagpt --init-config  # create ~/.metagpt/config2.yaml, modify it to your own config
+# https://docs.deepwisdom.ai/main/en/guide/get_started/configuration.html
+metagpt --init-config  # it will create ~/.metagpt/config2.yaml, just modify it to your needs
+```
+
+### Configuration
+
+You can configure `~/.metagpt/config2.yaml` according to the [example](https://github.com/geekan/MetaGPT/blob/main/config/config2.example.yaml) and [doc](https://docs.deepwisdom.ai/main/en/guide/get_started/configuration.html):
+
+```yaml
+llm:
+  api_type: "openai"  # or azure / ollama / open_llm etc. Check LLMType for more options
+  model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
+  base_url: "https://api.openai.com/v1"  # or forward url / other llm url
+  api_key: "YOUR_API_KEY"
+```
+
+### Usage
+
+After installation, you can use it as CLI
+
+```bash
 metagpt "Create a 2048 game"  # this will create a repo in ./workspace
 ```

@ -73,27 +95,46 @@ ### Pip installation
 ```

 detail installation please refer to [cli_install](https://docs.deepwisdom.ai/main/en/guide/get_started/installation.html#install-stable-version)
+ or [docker_install](https://docs.deepwisdom.ai/main/en/guide/get_started/installation.html#install-with-docker)

 ### Docker installation
-> Note: In the Windows, you need to replace "/opt/metagpt" with a directory that Docker has permission to create, such as "D:\Users\x\metagpt"
+<details><summary><strong>⏬ Step 1: Download metagpt image and prepare config2.yaml </strong><i>:: click to expand ::</i></summary>
+<div>

 ```bash
-# Step 1: Download metagpt official image and prepare config2.yaml
 docker pull metagpt/metagpt:latest
 mkdir -p /opt/metagpt/{config,workspace}
 docker run --rm metagpt/metagpt:latest cat /app/metagpt/config/config2.yaml > /opt/metagpt/config/config2.yaml
 vim /opt/metagpt/config/config2.yaml # Change the config
+```

-# Step 2: Run metagpt demo with container
-docker run --rm \
+</div>
+</details>
+
+<details><summary><strong>⏬ Step 2: Run metagpt container </strong><i>:: click to expand ::</i></summary>
+<div>
+
+```bash
+docker run --name metagpt -d \
    --privileged \
    -v /opt/metagpt/config/config2.yaml:/app/metagpt/config/config2.yaml \
    -v /opt/metagpt/workspace:/app/metagpt/workspace \
-    metagpt/metagpt:latest \
-    metagpt "Create a 2048 game"
+    metagpt/metagpt:latest
 ```

-detail installation please refer to [docker_install](https://docs.deepwisdom.ai/main/en/guide/get_started/installation.html#install-with-docker)
+</div>
+</details>
+
+<details><summary><strong>⏬ Step 3: Use metagpt </strong><i>:: click to expand ::</i></summary>
+<div>
+
+```bash
+docker exec -it metagpt /bin/bash
+$ metagpt "Create a 2048 game"  # this will create a repo in ./workspace
+```
+
+</div>
+</details>

 ### QuickStart & Demo Video
 - Try it on [MetaGPT Huggingface Space](https://huggingface.co/spaces/deepwisdom/MetaGPT)
--- a/config/config2.example.yaml
+++ b/config/config2.example.yaml
@ -1,17 +1,19 @@
 llm:
-  api_type: "openai"
+  api_type: "openai"  # or azure / ollama / open_llm etc. Check LLMType for more options
  base_url: "YOUR_BASE_URL"
  api_key: "YOUR_API_KEY"
  model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
+  repair_llm_output: true  # when the output is not a valid json, try to repair it
+  proxy: "YOUR_PROXY"  # for LLM API requests
+  pricing_plan: "" # Optional. If invalid, it will be automatically filled in with the value of the `model`.
  # Azure-exclusive pricing plan mappings：
  # - gpt-3.5-turbo 4k: "gpt-3.5-turbo-1106"
  # - gpt-4-turbo: "gpt-4-turbo-preview"
  # - gpt-4-turbo-vision: "gpt-4-vision-preview"
  # - gpt-4 8k: "gpt-4"
  # See for more: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
-  # pricing_plan: "gpt-4-turbo-preview"

-proxy: "YOUR_PROXY"
+proxy: "YOUR_PROXY"  # for tools like requests, playwright, selenium, etc.

 search:
  api_type: "google"
@ -48,5 +50,3 @@ iflytek_api_key: "YOUR_API_KEY"
 iflytek_api_secret: "YOUR_API_SECRET"

 metagpt_tti_url: "YOUR_MODEL_URL"
-
-repair_llm_output: true
--- a/docs/ROADMAP.md
+++ b/docs/ROADMAP.md
@ -35,14 +35,14 @@ ### Tasks
 3. Strategies
   1. Support ReAct strategy (experimentation done with game agents)
   2. Support CoT strategy (experimentation done with game agents)
-   3. Support ToT strategy
+   3. ~~Support ToT strategy~~ (v0.6.0)
   4. Support Reflection strategy (experimentation done with game agents)
-   5. Support planning
+   5. ~~Support planning~~ (v0.7.0)
 4. Actions
   1. ~~Implementation: Search~~ (v0.2.1)
   2. Implementation: Knowledge search, supporting 10+ data formats
-   3. Implementation: Data EDA (expected v0.7.0)
-   4. Implementation: Review & Revise (expected v0.7.0)
+   3. ~~Implementation: Data EDA~~ (v0.7.0)
+   4. ~~Implementation: Review & Revise~~ (v0.7.0)
   5. ~~Implementation: Add Document~~ (v0.5.0)
   6. ~~Implementation: Delete Document~~ (v0.5.0)
   7. Implementation: Self-training
@ -50,7 +50,7 @@ ### Tasks
   9. Implementation: Generate reliable unit tests based on YAPI
   10. Implementation: Self-evaluation
   11. Implementation: AI Invocation
-   12. Implementation: Learning and using third-party standard libraries
+   12. ~~Implementation: Learning and using third-party standard libraries~~ (v0.7.0)
   13. Implementation: Data collection
   14. Implementation: AI training
   15. ~~Implementation: Run code~~ (v0.2.1)
@ -63,14 +63,14 @@ ### Tasks
 7. Roles
   1. Perfect the action pool/skill pool for each role
   2. E-commerce seller
-   3. Data analyst (expected v0.7.0)
+   3. ~~Data analyst~~ (v0.7.0)
   4. News observer
   5. ~~Institutional researcher~~ (v0.2.1)
 8. Evaluation
   1. Support an evaluation on a game dataset (experimentation done with game agents)
   2. Reproduce papers, implement full skill acquisition for a single game role, achieving SOTA results (experimentation done with game agents)
-   3. Support an evaluation on a math dataset (expected v0.7.0)
-   4. Reproduce papers, achieving SOTA results for current mathematical problem solving process
+   3. Support an evaluation on a math dataset (expected v0.8.0)
+   4. Reproduce papers, achieving SOTA results for current mathematical problem solving process (expected v0.8.0)
 9. LLM
   1. Support Claude underlying API
   2. ~~Support Azure asynchronous API~~
--- a/docs/scripts/coverage.sh
+++ b/docs/scripts/coverage.sh
@ -1 +1 @@
-coverage run --source ./metagpt -m pytest --durations=0 --timeout=100 && coverage report -m && coverage html && open htmlcov/index.html
+coverage run --source ./metagpt -m pytest -n 8 --durations=0 --timeout=100 && coverage report -m && coverage html && open htmlcov/index.html
--- a/examples/debate_simple.py
+++ b/examples/debate_simple.py
@ -8,14 +8,17 @@
 import asyncio

 from metagpt.actions import Action
+from metagpt.config2 import Config
 from metagpt.environment import Environment
 from metagpt.roles import Role
 from metagpt.team import Team

-action1 = Action(name="AlexSay", instruction="Express your opinion with emotion and don't repeat it")
-action1.llm.model = "gpt-4-1106-preview"
-action2 = Action(name="BobSay", instruction="Express your opinion with emotion and don't repeat it")
-action2.llm.model = "gpt-3.5-turbo-1106"
+gpt35 = Config.default()
+gpt35.llm.model = "gpt-3.5-turbo-1106"
+gpt4 = Config.default()
+gpt4.llm.model = "gpt-4-1106-preview"
+action1 = Action(config=gpt4, name="AlexSay", instruction="Express your opinion with emotion and don't repeat it")
+action2 = Action(config=gpt35, name="BobSay", instruction="Express your opinion with emotion and don't repeat it")
 alex = Role(name="Alex", profile="Democratic candidate", goal="Win the election", actions=[action1], watch=[action2])
 bob = Role(name="Bob", profile="Republican candidate", goal="Win the election", actions=[action2], watch=[action1])
 env = Environment(desc="US election live broadcast")
--- a/examples/llm_hello_world.py
+++ b/examples/llm_hello_world.py
@ -6,16 +6,25 @@
@File    : llm_hello_world.py
 """
 import asyncio
-from pathlib import Path

 from metagpt.llm import LLM
 from metagpt.logs import logger
-from metagpt.utils.common import encode_image


 async def main():
    llm = LLM()
-    logger.info(await llm.aask("hello world"))
+    # llm type check
+    question = "what's your name"
+    logger.info(f"{question}: ")
+    logger.info(await llm.aask(question))
+    logger.info("\n\n")
+
+    logger.info(
+        await llm.aask(
+            "who are you", system_msgs=["act as a robot, just answer 'I'am robot' if the question is 'who are you'"]
+        )
+    )
+
    logger.info(await llm.aask_batch(["hi", "write python hello world."]))

    hello_msg = [{"role": "user", "content": "count from 1 to 10. split by newline."}]
@ -29,12 +38,6 @@ async def main():
    if hasattr(llm, "completion"):
        logger.info(llm.completion(hello_msg))

-    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
-    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
-    img_base64 = encode_image(invoice_path)
-    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
-    assert "true" in res.lower()
-

 if __name__ == "__main__":
    asyncio.run(main())
--- a/examples/llm_vision.py
+++ b/examples/llm_vision.py
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : example to run the ability of LLM vision
+
+import asyncio
+from pathlib import Path
+
+from metagpt.llm import LLM
+from metagpt.utils.common import encode_image
+
+
+async def main():
+    llm = LLM()
+
+    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
+    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
+    img_base64 = encode_image(invoice_path)
+    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
+    assert "true" in res.lower()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/mi/README.md
+++ b/examples/mi/README.md
@ -0,0 +1,18 @@
+# MetaGPT Interpreter (MI)
+
+## What is Interpreter
+Interpreter is an agent who solves problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below.
+
+## Example List
+- Data visualization
+- Machine learning modeling
+- Image background removal
+- Solve math problems
+- Receipt OCR
+- Tool usage: web page imitation
+- Tool usage: web crawling
+- Tool usage: text2image
+- Tool usage: email summarization and response
+- More on the way!
+
+Please see [here](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/mi_intro.html) for detailed explanation.
--- a/examples/mi/crawl_webpage.py
+++ b/examples/mi/crawl_webpage.py
@ -5,15 +5,15 @@
@File    :   crawl_webpage.py
 """

-from metagpt.roles.ci.code_interpreter import CodeInterpreter
+from metagpt.roles.mi.interpreter import Interpreter


 async def main():
    prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
-    ci = CodeInterpreter(goal=prompt, use_tools=True)
+    mi = Interpreter(use_tools=True)

-    await ci.run(prompt)
+    await mi.run(prompt)


 if __name__ == "__main__":
--- a/examples/mi/data_visualization.py
+++ b/examples/mi/data_visualization.py
@ -0,0 +1,14 @@
+import asyncio
+
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main(requirement: str = ""):
+    mi = Interpreter(use_tools=False)
+    await mi.run(requirement)
+
+
+if __name__ == "__main__":
+    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+
+    asyncio.run(main(requirement))
--- a/examples/mi/email_summary.py
+++ b/examples/mi/email_summary.py
@ -0,0 +1,33 @@
+# -*- encoding: utf-8 -*-
+"""
+@Date    :   2024/02/07 
+@Author  :   Tuo Zhou
+@File    :   email_summary.py
+"""
+import os
+
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main():
+    email_account = "your_email_account"
+    # your password will stay only on your device and not go to LLM api
+    os.environ["email_password"] = "your_email_password"
+
+    ### Prompt for automatic email reply, uncomment to try this too ###
+    # prompt = f"""I will give you your Outlook email account ({email_account}) and password (email_password item in the environment variable). You need to find the latest email in my inbox with the sender's suffix @gmail.com and reply "Thank you! I have received your email~"""""
+
+    ### Prompt for automatic email summary ###
+    prompt = f"""I will give you your Outlook email account ({email_account}) and password (email_password item in the environment variable).
+            Firstly, Please help me fetch the latest 5 senders and full letter contents.
+            Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""
+
+    mi = Interpreter(use_tools=True)
+
+    await mi.run(prompt)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/examples/mi/imitate_webpage.py
+++ b/examples/mi/imitate_webpage.py
@ -5,7 +5,7 @@
@Author  : mannaandpoem
@File    : imitate_webpage.py
 """
-from metagpt.roles.ci.code_interpreter import CodeInterpreter
+from metagpt.roles.mi.interpreter import Interpreter


 async def main():
@ -15,9 +15,9 @@ Firstly, utilize Selenium and WebDriver for rendering.
 Secondly, convert image to a webpage including HTML, CSS and JS in one go. 
 Finally, save webpage in a text file. 
 Note: All required dependencies and environments have been fully installed and configured."""
-    ci = CodeInterpreter(goal=prompt, use_tools=True)
+    mi = Interpreter(use_tools=True)

-    await ci.run(prompt)
+    await mi.run(prompt)


 if __name__ == "__main__":
--- a/examples/mi/machine_learning.py
+++ b/examples/mi/machine_learning.py
@ -0,0 +1,13 @@
+import fire
+
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main(auto_run: bool = True):
+    requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
+    mi = Interpreter(auto_run=auto_run)
+    await mi.run(requirement)
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
--- a/examples/mi/ml_engineer_with_tools.py
+++ b/examples/mi/ml_engineer_with_tools.py
@ -0,0 +1,16 @@
+import asyncio
+
+from metagpt.roles.mi.ml_engineer import MLEngineer
+
+
+async def main(requirement: str):
+    role = MLEngineer(auto_run=True, use_tools=True)
+    await role.run(requirement)
+
+
+if __name__ == "__main__":
+    data_path = "your_path_to_icr/icr-identify-age-related-conditions"
+    train_path = f"{data_path}/your_train_data.csv"
+    eval_path = f"{data_path}/your_eval_data.csv"
+    requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {train_path}, eval data path:{eval_path}."
+    asyncio.run(main(requirement))
--- a/examples/mi/ocr_receipt.py
+++ b/examples/mi/ocr_receipt.py
@ -0,0 +1,21 @@
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main():
+    # Notice: pip install metagpt[ocr] before using this example
+    image_path = "image.jpg"
+    language = "English"
+    requirement = f"""This is a {language} receipt image.
+    Your goal is to perform OCR on images using PaddleOCR, output text content from the OCR results and discard 
+    coordinates and confidence levels, then recognize the total amount from ocr text content, and finally save as table. 
+    Image path: {image_path}.
+    NOTE: The environments for Paddle and PaddleOCR are all ready and has been fully installed."""
+    mi = Interpreter()
+
+    await mi.run(requirement)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/examples/mi/rm_image_background.py
+++ b/examples/mi/rm_image_background.py
@ -0,0 +1,15 @@
+import asyncio
+
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main(requirement: str = ""):
+    mi = Interpreter(use_tools=False)
+    await mi.run(requirement)
+
+
+if __name__ == "__main__":
+    image_path = "/your/path/to/the/image.jpeg"
+    save_path = "/your/intended/save/path/for/image_rm_bg.png"
+    requirement = f"This is a image, you need to use python toolkit rembg to remove the background of the image and save the result. image path:{image_path}; save path:{save_path}."
+    asyncio.run(main(requirement))
--- a/examples/mi/sd_tool_usage.py
+++ b/examples/mi/sd_tool_usage.py
@ -4,12 +4,12 @@
 # @Desc    :
 import asyncio

-from metagpt.roles.ci.code_interpreter import CodeInterpreter
+from metagpt.roles.mi.interpreter import Interpreter


 async def main(requirement: str = ""):
-    code_interpreter = CodeInterpreter(use_tools=True, goal=requirement)
-    await code_interpreter.run(requirement)
+    mi = Interpreter(use_tools=True, goal=requirement)
+    await mi.run(requirement)


 if __name__ == "__main__":
--- a/examples/mi/solve_math_problems.py
+++ b/examples/mi/solve_math_problems.py
@ -0,0 +1,13 @@
+import asyncio
+
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+async def main(requirement: str = ""):
+    mi = Interpreter(use_tools=False)
+    await mi.run(requirement)
+
+
+if __name__ == "__main__":
+    requirement = "Solve this math problem: The greatest common divisor of positive integers m and n is 6. The least common multiple of m and n is 126. What is the least possible value of m + n?"
+    asyncio.run(main(requirement))
--- a/examples/write_novel.py
+++ b/examples/write_novel.py
@ -14,6 +14,22 @@ from metagpt.actions.action_node import ActionNode
 from metagpt.llm import LLM


+class Chapter(BaseModel):
+    name: str = Field(default="Chapter 1", description="The name of the chapter.")
+    content: str = Field(default="...", description="The content of the chapter. No more than 1000 words.")
+
+
+class Chapters(BaseModel):
+    chapters: List[Chapter] = Field(
+        default=[
+            {"name": "Chapter 1", "content": "..."},
+            {"name": "Chapter 2", "content": "..."},
+            {"name": "Chapter 3", "content": "..."},
+        ],
+        description="The chapters of the novel.",
+    )
+
+
 class Novel(BaseModel):
    name: str = Field(default="The Lord of the Rings", description="The name of the novel.")
    user_group: str = Field(default="...", description="The user group of the novel.")
@ -28,22 +44,17 @@ class Novel(BaseModel):
    ending: str = Field(default="...", description="The ending of the novel.")


-class Chapter(BaseModel):
-    name: str = Field(default="Chapter 1", description="The name of the chapter.")
-    content: str = Field(default="...", description="The content of the chapter. No more than 1000 words.")
-
-
 async def generate_novel():
    instruction = (
-        "Write a novel named 'Harry Potter in The Lord of the Rings'. "
+        "Write a novel named 'Reborn in Skyrim'. "
        "Fill the empty nodes with your own ideas. Be creative! Use your own words!"
        "I will tip you $100,000 if you write a good novel."
    )
    novel_node = await ActionNode.from_pydantic(Novel).fill(context=instruction, llm=LLM())
-    chap_node = await ActionNode.from_pydantic(Chapter).fill(
+    chap_node = await ActionNode.from_pydantic(Chapters).fill(
        context=f"### instruction\n{instruction}\n### novel\n{novel_node.content}", llm=LLM()
    )
-    print(chap_node.content)
+    print(chap_node.instruct_content)


 asyncio.run(generate_novel())
--- a/metagpt/actions/init.py
+++ b/metagpt/actions/init.py
@ -22,9 +22,9 @@ from metagpt.actions.write_code_review import WriteCodeReview
 from metagpt.actions.write_prd import WritePRD
 from metagpt.actions.write_prd_review import WritePRDReview
 from metagpt.actions.write_test import WriteTest
-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
-from metagpt.actions.ci.write_analysis_code import WriteCodeWithoutTools, WriteCodeWithTools
-from metagpt.actions.ci.write_plan import WritePlan
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.write_analysis_code import WriteCodeWithoutTools, WriteCodeWithTools
+from metagpt.actions.mi.write_plan import WritePlan


 class ActionType(Enum):
--- a/metagpt/actions/mi/init.py
+++ b/metagpt/actions/mi/init.py
--- a/metagpt/actions/mi/ask_review.py
+++ b/metagpt/actions/mi/ask_review.py
--- a/metagpt/actions/mi/debug_code.py
+++ b/metagpt/actions/mi/debug_code.py
@ -1,6 +1,6 @@
 from __future__ import annotations

-from metagpt.actions.ci.write_analysis_code import BaseWriteAnalysisCode
+from metagpt.actions.mi.write_analysis_code import BaseWriteAnalysisCode
 from metagpt.logs import logger
 from metagpt.schema import Message
 from metagpt.utils.common import create_func_call_config
--- a/metagpt/actions/mi/execute_nb_code.py
+++ b/metagpt/actions/mi/execute_nb_code.py
@ -99,7 +99,7 @@ class ExecuteNbCode(Action):
        for i, output in enumerate(outputs):
            if output["output_type"] == "stream" and not any(
                tag in output["text"]
-                for tag in ["| INFO     | metagpt", "| ERROR    | metagpt", "| WARNING  | metagpt"]
+                for tag in ["| INFO     | metagpt", "| ERROR    | metagpt", "| WARNING  | metagpt", "DEBUG"]
            ):
                parsed_output += output["text"]
            elif output["output_type"] == "display_data":
@ -182,7 +182,7 @@ class ExecuteNbCode(Action):
            outputs = self.parse_outputs(self.nb.cells[-1].outputs)
            outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success)

-            if "!pip" in outputs:
+            if "!pip" in code:
                success = False

            return outputs, success
--- a/metagpt/actions/mi/ml_action.py
+++ b/metagpt/actions/mi/ml_action.py
@ -3,14 +3,14 @@ from __future__ import annotations
 from typing import Tuple

 from metagpt.actions import Action
-from metagpt.actions.ci.write_analysis_code import WriteCodeWithTools
-from metagpt.prompts.ci.ml_action import (
+from metagpt.actions.mi.write_analysis_code import WriteCodeWithTools
+from metagpt.prompts.mi.ml_action import (
    ML_GENERATE_CODE_PROMPT,
    ML_TOOL_USAGE_PROMPT,
    PRINT_DATA_COLUMNS,
    UPDATE_DATA_COLUMNS,
 )
-from metagpt.prompts.ci.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
+from metagpt.prompts.mi.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
 from metagpt.schema import Message, Plan
 from metagpt.utils.common import create_func_call_config, remove_comments

--- a/metagpt/actions/mi/write_analysis_code.py
+++ b/metagpt/actions/mi/write_analysis_code.py
@ -10,7 +10,7 @@ from typing import Tuple

 from metagpt.actions import Action
 from metagpt.logs import logger
-from metagpt.prompts.ci.write_analysis_code import (
+from metagpt.prompts.mi.write_analysis_code import (
    CODE_GENERATOR_WITH_TOOLS,
    SELECT_FUNCTION_TOOLS,
    TOOL_RECOMMENDATION_PROMPT,
--- a/metagpt/actions/mi/write_plan.py
+++ b/metagpt/actions/mi/write_plan.py
@ -12,7 +12,7 @@ from typing import Tuple

 from metagpt.actions import Action
 from metagpt.logs import logger
-from metagpt.prompts.ci.write_analysis_code import (
+from metagpt.prompts.mi.write_analysis_code import (
    ASSIGN_TASK_TYPE_CONFIG,
    ASSIGN_TASK_TYPE_PROMPT,
 )
--- a/metagpt/actions/research.py
+++ b/metagpt/actions/research.py
@ -133,7 +133,7 @@ class CollectLinks(Action):
                if len(remove) == 0:
                    break

-        model_name = config.get_openai_llm().model
+        model_name = config.llm.model
        prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
        logger.debug(prompt)
        queries = await self._aask(prompt, [system_text])
--- a/metagpt/configs/llm_config.py
+++ b/metagpt/configs/llm_config.py
@ -24,6 +24,10 @@ class LLMType(Enum):
    METAGPT = "metagpt"
    AZURE = "azure"
    OLLAMA = "ollama"
+    QIANFAN = "qianfan"  # Baidu BCE
+    DASHSCOPE = "dashscope"  # Aliyun LingJi DashScope
+    MOONSHOT = "moonshot"
+    MISTRAL = "mistral"

    def __missing__(self, key):
        return self.OPENAI
@ -36,7 +40,7 @@ class LLMConfig(YamlModel):
    Optional Fields in pydantic: https://docs.pydantic.dev/latest/migration/#required-optional-and-nullable-fields
    """

-    api_key: str
+    api_key: str = "sk-"
    api_type: LLMType = LLMType.OPENAI
    base_url: str = "https://api.openai.com/v1"
    api_version: Optional[str] = None
@ -44,6 +48,11 @@ class LLMConfig(YamlModel):
    model: Optional[str] = None  # also stands for DEPLOYMENT_NAME
    pricing_plan: Optional[str] = None  # Cost Settlement Plan Parameters.

+    # For Cloud Service Provider like Baidu/ Alibaba
+    access_key: Optional[str] = None
+    secret_key: Optional[str] = None
+    endpoint: Optional[str] = None  # for self-deployed model on the cloud
+
    # For Spark(Xunfei), maybe remove later
    app_id: Optional[str] = None
    api_secret: Optional[str] = None
--- a/metagpt/context.py
+++ b/metagpt/context.py
@ -12,10 +12,14 @@ from typing import Any, Optional
 from pydantic import BaseModel, ConfigDict

 from metagpt.config2 import Config
-from metagpt.configs.llm_config import LLMConfig
+from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import create_llm_instance
-from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.cost_manager import (
+    CostManager,
+    FireworksCostManager,
+    TokenCostManager,
+)
 from metagpt.utils.git_repository import GitRepository
 from metagpt.utils.project_repo import ProjectRepo

@ -80,12 +84,21 @@ class Context(BaseModel):
    #     self._llm = None
    #     return self._llm

+    def _select_costmanager(self, llm_config: LLMConfig) -> CostManager:
+        """Return a CostManager instance"""
+        if llm_config.api_type == LLMType.FIREWORKS:
+            return FireworksCostManager()
+        elif llm_config.api_type == LLMType.OPEN_LLM:
+            return TokenCostManager()
+        else:
+            return self.cost_manager
+
    def llm(self) -> BaseLLM:
        """Return a LLM instance, fixme: support cache"""
        # if self._llm is None:
        self._llm = create_llm_instance(self.config.llm)
        if self._llm.cost_manager is None:
-            self._llm.cost_manager = self.cost_manager
+            self._llm.cost_manager = self._select_costmanager(self.config.llm)
        return self._llm

    def llm_with_cost_manager_from_llm_config(self, llm_config: LLMConfig) -> BaseLLM:
@ -93,5 +106,5 @@ class Context(BaseModel):
        # if self._llm is None:
        llm = create_llm_instance(llm_config)
        if llm.cost_manager is None:
-            llm.cost_manager = self.cost_manager
+            llm.cost_manager = self._select_costmanager(llm_config)
        return llm
--- a/metagpt/memory/memory_storage.py
+++ b/metagpt/memory/memory_storage.py
@ -7,7 +7,6 @@
 from pathlib import Path
 from typing import Optional

-from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores.faiss import FAISS
 from langchain_core.embeddings import Embeddings

@ -15,6 +14,7 @@ from metagpt.const import DATA_PATH, MEM_TTL
 from metagpt.document_store.faiss_store import FaissStore
 from metagpt.logs import logger
 from metagpt.schema import Message
+from metagpt.utils.embedding import get_embedding
 from metagpt.utils.serialize import deserialize_message, serialize_message


@ -30,7 +30,7 @@ class MemoryStorage(FaissStore):
        self.threshold: float = 0.1  # experience value. TODO The threshold to filter similar memories
        self._initialized: bool = False

-        self.embedding = embedding or OpenAIEmbeddings()
+        self.embedding = embedding or get_embedding()
        self.store: FAISS = None  # Faiss engine

    @property
--- a/metagpt/prompts/mi/init.py
+++ b/metagpt/prompts/mi/init.py
--- a/metagpt/prompts/mi/ml_action.py
+++ b/metagpt/prompts/mi/ml_action.py
--- a/metagpt/prompts/mi/write_analysis_code.py
+++ b/metagpt/prompts/mi/write_analysis_code.py
--- a/metagpt/prompts/tool_types.py
+++ b/metagpt/prompts/tool_types.py
@ -1,3 +1,10 @@
+# Prompt for using tools of "eda" type
+EDA_PROMPT = """
+The current task is about exploratory data analysis, please note the following:
+- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.
+- Remember to `import numpy as np` before using Numpy functions.
+"""
+
 # Prompt for using tools of "data_preprocess" type
 DATA_PREPROCESS_PROMPT = """
 The current task is about data preprocessing, please note the following:
--- a/metagpt/provider/init.py
+++ b/metagpt/provider/init.py
@ -6,21 +6,19 @@
@File    : __init__.py
 """

-from metagpt.provider.fireworks_api import FireworksLLM
 from metagpt.provider.google_gemini_api import GeminiLLM
 from metagpt.provider.ollama_api import OllamaLLM
-from metagpt.provider.open_llm_api import OpenLLM
 from metagpt.provider.openai_api import OpenAILLM
 from metagpt.provider.zhipuai_api import ZhiPuAILLM
 from metagpt.provider.azure_openai_api import AzureOpenAILLM
 from metagpt.provider.metagpt_api import MetaGPTLLM
 from metagpt.provider.human_provider import HumanProvider
 from metagpt.provider.spark_api import SparkLLM
+from metagpt.provider.qianfan_api import QianFanLLM
+from metagpt.provider.dashscope_api import DashScopeLLM

 __all__ = [
-    "FireworksLLM",
    "GeminiLLM",
-    "OpenLLM",
    "OpenAILLM",
    "ZhiPuAILLM",
    "AzureOpenAILLM",
@ -28,4 +26,6 @@ __all__ = [
    "OllamaLLM",
    "HumanProvider",
    "SparkLLM",
+    "QianFanLLM",
+    "DashScopeLLM",
 ]
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@ -14,11 +14,12 @@ from typing import Dict, Optional, Union

 from openai import AsyncOpenAI
 from openai.types import CompletionUsage
+from pydantic import BaseModel

 from metagpt.configs.llm_config import LLMConfig
 from metagpt.logs import logger
 from metagpt.schema import Message
-from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.cost_manager import CostManager, Costs
 from metagpt.utils.exceptions import handle_exception


@ -72,6 +73,28 @@ class BaseLLM(ABC):
    def _default_system_msg(self):
        return self._system_msg(self.system_prompt)

+    def _update_costs(self, usage: Union[dict, BaseModel], model: str = None, local_calc_usage: bool = True):
+        """update each request's token cost
+        Args:
+            model (str): model name or in some scenarios called endpoint
+            local_calc_usage (bool): some models don't calculate usage, it will overwrite LLMConfig.calc_usage
+        """
+        calc_usage = self.config.calc_usage and local_calc_usage
+        model = model or self.model
+        usage = usage.model_dump() if isinstance(usage, BaseModel) else usage
+        if calc_usage and self.cost_manager:
+            try:
+                prompt_tokens = int(usage.get("prompt_tokens", 0))
+                completion_tokens = int(usage.get("completion_tokens", 0))
+                self.cost_manager.update_cost(prompt_tokens, completion_tokens, model)
+            except Exception as e:
+                logger.error(f"{self.__class__.__name__} updates costs failed! exp: {e}")
+
+    def get_costs(self) -> Costs:
+        if not self.cost_manager:
+            return Costs(0, 0, 0, 0)
+        return self.cost_manager.get_costs()
+
    async def aask(
        self,
        msg: str,
@ -172,7 +195,7 @@ class BaseLLM(ABC):
        :return dict: return the first function arguments of choice, for example,
            {'language': 'python', 'code': "print('Hello, World!')"}
        """
-        return json.loads(self.get_choice_function(rsp)["arguments"])
+        return json.loads(self.get_choice_function(rsp)["arguments"], strict=False)

    @handle_exception
    def _update_costs(self, usage: CompletionUsage | Dict):
--- a/metagpt/provider/dashscope_api.py
+++ b/metagpt/provider/dashscope_api.py
@ -0,0 +1,248 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   :
+
+import json
+from http import HTTPStatus
+from typing import Any, AsyncGenerator, Dict, List, Union
+
+import dashscope
+from dashscope.aigc.generation import Generation
+from dashscope.api_entities.aiohttp_request import AioHttpRequest
+from dashscope.api_entities.api_request_data import ApiRequestData
+from dashscope.api_entities.api_request_factory import _get_protocol_params
+from dashscope.api_entities.dashscope_response import (
+    GenerationOutput,
+    GenerationResponse,
+    Message,
+)
+from dashscope.client.base_api import BaseAioApi
+from dashscope.common.constants import SERVICE_API_PATH, ApiProtocol
+from dashscope.common.error import (
+    InputDataRequired,
+    InputRequired,
+    ModelRequired,
+    UnsupportedApiProtocol,
+)
+from tenacity import (
+    after_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+from metagpt.logs import log_llm_stream, logger
+from metagpt.provider.base_llm import BaseLLM, LLMConfig
+from metagpt.provider.llm_provider_registry import LLMType, register_provider
+from metagpt.provider.openai_api import log_and_reraise
+from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.token_counter import DASHSCOPE_TOKEN_COSTS
+
+
+def build_api_arequest(
+    model: str, input: object, task_group: str, task: str, function: str, api_key: str, is_service=True, **kwargs
+):
+    (
+        api_protocol,
+        ws_stream_mode,
+        is_binary_input,
+        http_method,
+        stream,
+        async_request,
+        query,
+        headers,
+        request_timeout,
+        form,
+        resources,
+    ) = _get_protocol_params(kwargs)
+    task_id = kwargs.pop("task_id", None)
+    if api_protocol in [ApiProtocol.HTTP, ApiProtocol.HTTPS]:
+        if not dashscope.base_http_api_url.endswith("/"):
+            http_url = dashscope.base_http_api_url + "/"
+        else:
+            http_url = dashscope.base_http_api_url
+
+        if is_service:
+            http_url = http_url + SERVICE_API_PATH + "/"
+
+        if task_group:
+            http_url += "%s/" % task_group
+        if task:
+            http_url += "%s/" % task
+        if function:
+            http_url += function
+        request = AioHttpRequest(
+            url=http_url,
+            api_key=api_key,
+            http_method=http_method,
+            stream=stream,
+            async_request=async_request,
+            query=query,
+            timeout=request_timeout,
+            task_id=task_id,
+        )
+    else:
+        raise UnsupportedApiProtocol("Unsupported protocol: %s, support [http, https, websocket]" % api_protocol)
+
+    if headers is not None:
+        request.add_headers(headers=headers)
+
+    if input is None and form is None:
+        raise InputDataRequired("There is no input data and form data")
+
+    request_data = ApiRequestData(
+        model,
+        task_group=task_group,
+        task=task,
+        function=function,
+        input=input,
+        form=form,
+        is_binary_input=is_binary_input,
+        api_protocol=api_protocol,
+    )
+    request_data.add_resources(resources)
+    request_data.add_parameters(**kwargs)
+    request.data = request_data
+    return request
+
+
+class AGeneration(Generation, BaseAioApi):
+    @classmethod
+    async def acall(
+        cls,
+        model: str,
+        prompt: Any = None,
+        history: list = None,
+        api_key: str = None,
+        messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        **kwargs,
+    ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
+        if (prompt is None or not prompt) and (messages is None or not messages):
+            raise InputRequired("prompt or messages is required!")
+        if model is None or not model:
+            raise ModelRequired("Model is required!")
+        task_group, function = "aigc", "generation"  # fixed value
+        if plugins is not None:
+            headers = kwargs.pop("headers", {})
+            if isinstance(plugins, str):
+                headers["X-DashScope-Plugin"] = plugins
+            else:
+                headers["X-DashScope-Plugin"] = json.dumps(plugins)
+            kwargs["headers"] = headers
+        input, parameters = cls._build_input_parameters(model, prompt, history, messages, **kwargs)
+
+        api_key, model = BaseAioApi._validate_params(api_key, model)
+        request = build_api_arequest(
+            model=model,
+            input=input,
+            task_group=task_group,
+            task=Generation.task,
+            function=function,
+            api_key=api_key,
+            **kwargs,
+        )
+        response = await request.aio_call()
+        is_stream = kwargs.get("stream", False)
+        if is_stream:
+
+            async def aresp_iterator(response):
+                async for resp in response:
+                    yield GenerationResponse.from_api_response(resp)
+
+            return aresp_iterator(response)
+        else:
+            return GenerationResponse.from_api_response(response)
+
+
+@register_provider(LLMType.DASHSCOPE)
+class DashScopeLLM(BaseLLM):
+    def __init__(self, llm_config: LLMConfig):
+        self.config = llm_config
+        self.use_system_prompt = False  # only some models support system_prompt
+        self.__init_dashscope()
+        self.cost_manager = CostManager(token_costs=self.token_costs)
+
+    def __init_dashscope(self):
+        self.model = self.config.model
+        self.api_key = self.config.api_key
+        self.token_costs = DASHSCOPE_TOKEN_COSTS
+        self.aclient: AGeneration = AGeneration
+
+        # check support system_message models
+        support_system_models = [
+            "qwen-",  # all support
+            "llama2-",  # all support
+            "baichuan2-7b-chat-v1",
+            "chatglm3-6b",
+        ]
+        for support_model in support_system_models:
+            if support_model in self.model:
+                self.use_system_prompt = True
+
+    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
+        kwargs = {
+            "api_key": self.api_key,
+            "model": self.model,
+            "messages": messages,
+            "stream": stream,
+            "result_format": "message",
+        }
+        if self.config.temperature > 0:
+            # different model has default temperature. only set when it"s specified.
+            kwargs["temperature"] = self.config.temperature
+        if stream:
+            kwargs["incremental_output"] = True
+        return kwargs
+
+    def _check_response(self, resp: GenerationResponse):
+        if resp.status_code != HTTPStatus.OK:
+            raise RuntimeError(f"code: {resp.code}, request_id: {resp.request_id}, message: {resp.message}")
+
+    def get_choice_text(self, output: GenerationOutput) -> str:
+        return output.get("choices", [{}])[0].get("message", {}).get("content", "")
+
+    def completion(self, messages: list[dict]) -> GenerationOutput:
+        resp: GenerationResponse = self.aclient.call(**self._const_kwargs(messages, stream=False))
+        self._check_response(resp)
+
+        self._update_costs(dict(resp.usage))
+        return resp.output
+
+    async def _achat_completion(self, messages: list[dict]) -> GenerationOutput:
+        resp: GenerationResponse = await self.aclient.acall(**self._const_kwargs(messages, stream=False))
+        self._check_response(resp)
+        self._update_costs(dict(resp.usage))
+        return resp.output
+
+    async def acompletion(self, messages: list[dict], timeout=3) -> GenerationOutput:
+        return await self._achat_completion(messages)
+
+    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+        resp = await self.aclient.acall(**self._const_kwargs(messages, stream=True))
+        collected_content = []
+        usage = {}
+        async for chunk in resp:
+            self._check_response(chunk)
+            content = chunk.output.choices[0]["message"]["content"]
+            usage = dict(chunk.usage)  # each chunk has usage
+            log_llm_stream(content)
+            collected_content.append(content)
+        log_llm_stream("\n")
+        self._update_costs(usage)
+        full_content = "".join(collected_content)
+        return full_content
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_random_exponential(min=1, max=60),
+        after=after_log(logger, logger.level("WARNING").name),
+        retry=retry_if_exception_type(ConnectionError),
+        retry_error_callback=log_and_reraise,
+    )
+    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
+        if stream:
+            return await self._achat_completion_stream(messages)
+        resp = await self._achat_completion(messages)
+        return self.get_choice_text(resp)
--- a/metagpt/provider/fireworks_api.py
+++ b/metagpt/provider/fireworks_api.py
@ -1,123 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Desc   : fireworks.ai's api
-
-import re
-
-from openai import APIConnectionError, AsyncStream
-from openai.types import CompletionUsage
-from openai.types.chat import ChatCompletionChunk
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
-from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import logger
-from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import OpenAILLM, log_and_reraise
-from metagpt.utils.cost_manager import CostManager, Costs
-
-MODEL_GRADE_TOKEN_COSTS = {
-    "-1": {"prompt": 0.0, "completion": 0.0},  # abnormal condition
-    "16": {"prompt": 0.2, "completion": 0.8},  # 16 means model size <= 16B; 0.2 means $0.2/1M tokens
-    "80": {"prompt": 0.7, "completion": 2.8},  # 80 means 16B < model size <= 80B
-    "mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
-}
-
-
-class FireworksCostManager(CostManager):
-    def model_grade_token_costs(self, model: str) -> dict[str, float]:
-        def _get_model_size(model: str) -> float:
-            size = re.findall(".*-([0-9.]+)b", model)
-            size = float(size[0]) if len(size) > 0 else -1
-            return size
-
-        if "mixtral-8x7b" in model:
-            token_costs = MODEL_GRADE_TOKEN_COSTS["mixtral-8x7b"]
-        else:
-            model_size = _get_model_size(model)
-            if 0 < model_size <= 16:
-                token_costs = MODEL_GRADE_TOKEN_COSTS["16"]
-            elif 16 < model_size <= 80:
-                token_costs = MODEL_GRADE_TOKEN_COSTS["80"]
-            else:
-                token_costs = MODEL_GRADE_TOKEN_COSTS["-1"]
-        return token_costs
-
-    def update_cost(self, prompt_tokens: int, completion_tokens: int, model: str):
-        """
-        Refs to `https://app.fireworks.ai/pricing` **Developer pricing**
-        Update the total cost, prompt tokens, and completion tokens.
-
-        Args:
-        prompt_tokens (int): The number of tokens used in the prompt.
-        completion_tokens (int): The number of tokens used in the completion.
-        model (str): The model used for the API call.
-        """
-        self.total_prompt_tokens += prompt_tokens
-        self.total_completion_tokens += completion_tokens
-
-        token_costs = self.model_grade_token_costs(model)
-        cost = (prompt_tokens * token_costs["prompt"] + completion_tokens * token_costs["completion"]) / 1000000
-        self.total_cost += cost
-        logger.info(
-            f"Total running cost: ${self.total_cost:.4f}"
-            f"Current cost: ${cost:.4f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
-        )
-
-
-@register_provider(LLMType.FIREWORKS)
-class FireworksLLM(OpenAILLM):
-    def __init__(self, config: LLMConfig):
-        super().__init__(config=config)
-        self.auto_max_tokens = False
-        self.cost_manager = FireworksCostManager()
-
-    def _make_client_kwargs(self) -> dict:
-        kwargs = dict(api_key=self.config.api_key, base_url=self.config.base_url)
-        return kwargs
-
-    def get_costs(self) -> Costs:
-        return self.cost_manager.get_costs()
-
-    async def _achat_completion_stream(self, messages: list[dict], timeout=3) -> str:
-        response: AsyncStream[ChatCompletionChunk] = await self.aclient.chat.completions.create(
-            **self._cons_kwargs(messages), stream=True
-        )
-
-        collected_content = []
-        usage = CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
-        # iterate through the stream of events
-        async for chunk in response:
-            if chunk.choices:
-                choice = chunk.choices[0]
-                choice_delta = choice.delta
-                finish_reason = choice.finish_reason if hasattr(choice, "finish_reason") else None
-                if choice_delta.content:
-                    collected_content.append(choice_delta.content)
-                    print(choice_delta.content, end="")
-                if finish_reason:
-                    # fireworks api return usage when finish_reason is not None
-                    usage = CompletionUsage(**chunk.usage)
-
-        full_content = "".join(collected_content)
-        self._update_costs(usage)
-        return full_content
-
-    @retry(
-        wait=wait_random_exponential(min=1, max=60),
-        stop=stop_after_attempt(6),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(APIConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
-        """when streaming, print each token in place."""
-        if stream:
-            return await self._achat_completion_stream(messages)
-        rsp = await self._achat_completion(messages)
-        return self.get_choice_text(rsp)
--- a/metagpt/provider/general_api_requestor.py
+++ b/metagpt/provider/general_api_requestor.py
@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
        self, result: requests.Response, stream: bool
    ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
        """Returns the response(s) and a bool indicating whether it is a stream."""
-        if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
+        content_type = result.headers.get("Content-Type", "")
+        if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
            return (
                self._interpret_response_line(line, result.status_code, result.headers, stream=True)
                for line in parse_stream(result.iter_lines())
--- a/metagpt/provider/llm_provider_registry.py
+++ b/metagpt/provider/llm_provider_registry.py
@ -21,11 +21,15 @@ class LLMProviderRegistry:
        return self.providers[enum]


-def register_provider(key):
+def register_provider(keys):
    """register provider to registry"""

    def decorator(cls):
-        LLM_REGISTRY.register(key, cls)
+        if isinstance(keys, list):
+            for key in keys:
+                LLM_REGISTRY.register(key, cls)
+        else:
+            LLM_REGISTRY.register(keys, cls)
        return cls

    return decorator
--- a/metagpt/provider/open_llm_api.py
+++ b/metagpt/provider/open_llm_api.py
@ -1,39 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Desc   : self-host open llm model with openai-compatible interface
-
-from openai.types import CompletionUsage
-
-from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import logger
-from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import OpenAILLM
-from metagpt.utils.cost_manager import Costs, TokenCostManager
-from metagpt.utils.token_counter import count_message_tokens, count_string_tokens
-
-
-@register_provider(LLMType.OPEN_LLM)
-class OpenLLM(OpenAILLM):
-    def __init__(self, config: LLMConfig):
-        super().__init__(config)
-        self.cost_manager = TokenCostManager()
-
-    def _make_client_kwargs(self) -> dict:
-        kwargs = dict(api_key="sk-xxx", base_url=self.config.base_url)
-        return kwargs
-
-    def _calc_usage(self, messages: list[dict], rsp: str) -> CompletionUsage:
-        usage = CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
-        if not self.config.calc_usage:
-            return usage
-
-        try:
-            usage.prompt_tokens = count_message_tokens(messages, "open-llm-model")
-            usage.completion_tokens = count_string_tokens(rsp, "open-llm-model")
-        except Exception as e:
-            logger.error(f"usage calculation failed!: {e}")
-
-        return usage
-
-    def get_costs(self) -> Costs:
-        return self.cost_manager.get_costs()
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -9,7 +9,8 @@
 from __future__ import annotations

 import json
-from typing import AsyncIterator, Optional, Union
+import re
+from typing import Optional, Union

 from openai import APIConnectionError, AsyncOpenAI, AsyncStream
 from openai._base_client import AsyncHttpxClientWrapper
@ -30,7 +31,7 @@ from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.schema import Message
 from metagpt.utils.common import CodeParser, decode_image
-from metagpt.utils.cost_manager import CostManager, Costs
+from metagpt.utils.cost_manager import CostManager, TokenCostManager
 from metagpt.utils.exceptions import handle_exception
 from metagpt.utils.token_counter import (
    count_message_tokens,
@ -50,23 +51,20 @@ See FAQ 5.8
    raise retry_state.outcome.exception()


-@register_provider(LLMType.OPENAI)
+@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL])
 class OpenAILLM(BaseLLM):
    """Check https://platform.openai.com/examples for examples"""

    def __init__(self, config: LLMConfig):
        self.config = config
-        self._init_model()
        self._init_client()
        self.auto_max_tokens = False
        self.cost_manager: Optional[CostManager] = None

-    def _init_model(self):
-        self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
-        self.pricing_plan = self.config.pricing_plan or self.model
-
    def _init_client(self):
        """https://github.com/openai/openai-python#async-usage"""
+        self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
+        self.pricing_plan = self.config.pricing_plan or self.model
        kwargs = self._make_client_kwargs()
        self.aclient = AsyncOpenAI(**kwargs)

@ -88,22 +86,41 @@ class OpenAILLM(BaseLLM):

        return params

-    async def _achat_completion_stream(self, messages: list[dict], timeout=3) -> AsyncIterator[str]:
+    async def _achat_completion_stream(self, messages: list[dict], timeout=3) -> str:
        response: AsyncStream[ChatCompletionChunk] = await self.aclient.chat.completions.create(
            **self._cons_kwargs(messages, timeout=timeout), stream=True
        )
-
+        usage = None
+        collected_messages = []
        async for chunk in response:
            chunk_message = chunk.choices[0].delta.content or "" if chunk.choices else ""  # extract the message
-            yield chunk_message
+            finish_reason = chunk.choices[0].finish_reason if hasattr(chunk.choices[0], "finish_reason") else None
+            log_llm_stream(chunk_message)
+            collected_messages.append(chunk_message)
+            if finish_reason:
+                if hasattr(chunk, "usage"):
+                    # Some services have usage as an attribute of the chunk, such as Fireworks
+                    usage = CompletionUsage(**chunk.usage)
+                elif hasattr(chunk.choices[0], "usage"):
+                    # The usage of some services is an attribute of chunk.choices[0], such as Moonshot
+                    usage = CompletionUsage(**chunk.choices[0].usage)
+
+        log_llm_stream("\n")
+        full_reply_content = "".join(collected_messages)
+        if not usage:
+            # Some services do not provide the usage attribute, such as OpenAI or OpenLLM
+            usage = self._calc_usage(messages, full_reply_content)
+
+        self._update_costs(usage)
+        return full_reply_content

    def _cons_kwargs(self, messages: list[dict], timeout=3, **extra_kwargs) -> dict:
        kwargs = {
            "messages": messages,
            "max_tokens": self._get_max_tokens(messages),
-            "n": 1,
+            # "n": 1,  # Some services do not provide this parameter, such as mistral
            # "stop": None,  # default it's None and gpt4-v can't have this one
-            "temperature": 0.3,
+            "temperature": self.config.temperature,
            "model": self.model,
            "timeout": max(self.config.timeout, timeout),
        }
@ -130,18 +147,7 @@ class OpenAILLM(BaseLLM):
    async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
        """when streaming, print each token in place."""
        if stream:
-            resp = self._achat_completion_stream(messages, timeout=timeout)
-
-            collected_messages = []
-            async for i in resp:
-                log_llm_stream(i)
-                collected_messages.append(i)
-            log_llm_stream("\n")
-
-            full_reply_content = "".join(collected_messages)
-            usage = self._calc_usage(messages, full_reply_content)
-            self._update_costs(usage)
-            return full_reply_content
+            await self._achat_completion_stream(messages, timeout=timeout)

        rsp = await self._achat_completion(messages, timeout=timeout)
        return self.get_choice_text(rsp)
@ -196,6 +202,30 @@ class OpenAILLM(BaseLLM):
        rsp = await self._achat_completion_function(messages, **kwargs)
        return self.get_choice_function_arguments(rsp)

+    def _parse_arguments(self, arguments: str) -> dict:
+        """parse arguments in openai function call"""
+        if "langugae" not in arguments and "code" not in arguments:
+            logger.warning(f"Not found `code`, `language`, We assume it is pure code:\n {arguments}\n. ")
+            return {"language": "python", "code": arguments}
+
+        # 匹配language
+        language_pattern = re.compile(r'[\"\']?language[\"\']?\s*:\s*["\']([^"\']+?)["\']', re.DOTALL)
+        language_match = language_pattern.search(arguments)
+        language_value = language_match.group(1) if language_match else "python"
+
+        # 匹配code
+        code_pattern = r'(["\'`]{3}|["\'`])([\s\S]*?)\1'
+        try:
+            code_value = re.findall(code_pattern, arguments)[-1][-1]
+        except Exception as e:
+            logger.error(f"{e}, when re.findall({code_pattern}, {arguments})")
+            code_value = None
+
+        if code_value is None:
+            raise ValueError(f"Parse code error for {arguments}")
+        # arguments只有code的情况
+        return {"language": language_value, "code": code_value}
+
    # @handle_exception
    def get_choice_function_arguments(self, rsp: ChatCompletion) -> dict:
        """Required to provide the first function arguments of choice.
@ -211,7 +241,14 @@ class OpenAILLM(BaseLLM):
            and message.tool_calls[0].function.arguments is not None
        ):
            # reponse is code
-            return json.loads(message.tool_calls[0].function.arguments, strict=False)
+            try:
+                return json.loads(message.tool_calls[0].function.arguments, strict=False)
+            except json.decoder.JSONDecodeError as e:
+                error_msg = (
+                    f"Got JSONDecodeError for \n{'--'*40} \n{message.tool_calls[0].function.arguments}, {str(e)}"
+                )
+                logger.error(error_msg)
+                return self._parse_arguments(message.tool_calls[0].function.arguments)
        elif message.tool_calls is None and message.content is not None:
            # reponse is code, fix openai tools_call respond bug,
            # The response content is `code``, but it appears in the content instead of the arguments.
@ -234,23 +271,21 @@ class OpenAILLM(BaseLLM):
        if not self.config.calc_usage:
            return usage

+        self.model if not isinstance(self.cost_manager, TokenCostManager) else "open-llm-model"
        try:
            usage.prompt_tokens = count_message_tokens(messages, self.pricing_plan)
            usage.completion_tokens = count_string_tokens(rsp, self.pricing_plan)
        except Exception as e:
-            logger.error(f"usage calculation failed: {e}")
+            logger.warning(f"usage calculation failed: {e}")

        return usage

-    def get_costs(self) -> Costs:
-        if not self.cost_manager:
-            return Costs(0, 0, 0, 0)
-        return self.cost_manager.get_costs()
-
    def _get_max_tokens(self, messages: list[dict]):
        if not self.auto_max_tokens:
            return self.config.max_token
-        return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
+        # FIXME
+        # https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
+        return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)

    @handle_exception
    async def amoderation(self, content: Union[str, list[str]]):
--- a/metagpt/provider/qianfan_api.py
+++ b/metagpt/provider/qianfan_api.py
@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : llm api of qianfan from Baidu, supports ERNIE(wen xin yi yan) and opensource models
+import copy
+import os
+
+import qianfan
+from qianfan import ChatCompletion
+from qianfan.resources.typing import JsonBody
+from tenacity import (
+    after_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+
+from metagpt.configs.llm_config import LLMConfig, LLMType
+from metagpt.logs import log_llm_stream, logger
+from metagpt.provider.base_llm import BaseLLM
+from metagpt.provider.llm_provider_registry import register_provider
+from metagpt.provider.openai_api import log_and_reraise
+from metagpt.utils.cost_manager import CostManager
+from metagpt.utils.token_counter import (
+    QIANFAN_ENDPOINT_TOKEN_COSTS,
+    QIANFAN_MODEL_TOKEN_COSTS,
+)
+
+
+@register_provider(LLMType.QIANFAN)
+class QianFanLLM(BaseLLM):
+    """
+    Refs
+        Auth: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/3lmokh7n6#%E3%80%90%E6%8E%A8%E8%8D%90%E3%80%91%E4%BD%BF%E7%94%A8%E5%AE%89%E5%85%A8%E8%AE%A4%E8%AF%81aksk%E9%89%B4%E6%9D%83%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B
+        Token Price: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9
+        Models: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/wlmhm7vuo#%E5%AF%B9%E8%AF%9Dchat
+                https://cloud.baidu.com/doc/WENXINWORKSHOP/s/xlmokikxe#%E6%94%AF%E6%8C%81%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8
+    """
+
+    def __init__(self, config: LLMConfig):
+        self.config = config
+        self.use_system_prompt = False  # only some ERNIE-x related models support system_prompt
+        self.__init_qianfan()
+        self.cost_manager = CostManager(token_costs=self.token_costs)
+
+    def __init_qianfan(self):
+        if self.config.access_key and self.config.secret_key:
+            # for system level auth, use access_key and secret_key, recommended by official
+            # set environment variable due to official recommendation
+            os.environ.setdefault("QIANFAN_ACCESS_KEY", self.config.access_key)
+            os.environ.setdefault("QIANFAN_SECRET_KEY", self.config.secret_key)
+        elif self.config.api_key and self.config.secret_key:
+            # for application level auth, use api_key and secret_key
+            # set environment variable due to official recommendation
+            os.environ.setdefault("QIANFAN_AK", self.config.api_key)
+            os.environ.setdefault("QIANFAN_SK", self.config.secret_key)
+        else:
+            raise ValueError("Set the `access_key`&`secret_key` or `api_key`&`secret_key` first")
+
+        support_system_pairs = [
+            ("ERNIE-Bot-4", "completions_pro"),  # (model, corresponding-endpoint)
+            ("ERNIE-Bot-8k", "ernie_bot_8k"),
+            ("ERNIE-Bot", "completions"),
+            ("ERNIE-Bot-turbo", "eb-instant"),
+            ("ERNIE-Speed", "ernie_speed"),
+            ("EB-turbo-AppBuilder", "ai_apaas"),
+        ]
+        if self.config.model in [pair[0] for pair in support_system_pairs]:
+            # only some ERNIE models support
+            self.use_system_prompt = True
+        if self.config.endpoint in [pair[1] for pair in support_system_pairs]:
+            self.use_system_prompt = True
+
+        assert not (self.config.model and self.config.endpoint), "Only set `model` or `endpoint` in the config"
+        assert self.config.model or self.config.endpoint, "Should set one of `model` or `endpoint` in the config"
+
+        self.token_costs = copy.deepcopy(QIANFAN_MODEL_TOKEN_COSTS)
+        self.token_costs.update(QIANFAN_ENDPOINT_TOKEN_COSTS)
+
+        # self deployed model on the cloud not to calculate usage, it charges resource pool rental fee
+        self.calc_usage = self.config.calc_usage and self.config.endpoint is None
+        self.aclient: ChatCompletion = qianfan.ChatCompletion()
+
+    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
+        kwargs = {
+            "messages": messages,
+            "stream": stream,
+        }
+        if self.config.temperature > 0:
+            # different model has default temperature. only set when it's specified.
+            kwargs["temperature"] = self.config.temperature
+        if self.config.endpoint:
+            kwargs["endpoint"] = self.config.endpoint
+        elif self.config.model:
+            kwargs["model"] = self.config.model
+
+        if self.use_system_prompt:
+            # if the model support system prompt, extract and pass it
+            if messages[0]["role"] == "system":
+                kwargs["messages"] = messages[1:]
+                kwargs["system"] = messages[0]["content"]  # set system prompt here
+        return kwargs
+
+    def _update_costs(self, usage: dict):
+        """update each request's token cost"""
+        model_or_endpoint = self.config.model or self.config.endpoint
+        local_calc_usage = model_or_endpoint in self.token_costs
+        super()._update_costs(usage, model_or_endpoint, local_calc_usage)
+
+    def get_choice_text(self, resp: JsonBody) -> str:
+        return resp.get("result", "")
+
+    def completion(self, messages: list[dict]) -> JsonBody:
+        resp = self.aclient.do(**self._const_kwargs(messages=messages, stream=False))
+        self._update_costs(resp.body.get("usage", {}))
+        return resp.body
+
+    async def _achat_completion(self, messages: list[dict]) -> JsonBody:
+        resp = await self.aclient.ado(**self._const_kwargs(messages=messages, stream=False))
+        self._update_costs(resp.body.get("usage", {}))
+        return resp.body
+
+    async def acompletion(self, messages: list[dict], timeout=3) -> JsonBody:
+        return await self._achat_completion(messages)
+
+    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+        resp = await self.aclient.ado(**self._const_kwargs(messages=messages, stream=True))
+        collected_content = []
+        usage = {}
+        async for chunk in resp:
+            content = chunk.body.get("result", "")
+            usage = chunk.body.get("usage", {})
+            log_llm_stream(content)
+            collected_content.append(content)
+        log_llm_stream("\n")
+
+        self._update_costs(usage)
+        full_content = "".join(collected_content)
+        return full_content
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_random_exponential(min=1, max=60),
+        after=after_log(logger, logger.level("WARNING").name),
+        retry=retry_if_exception_type(ConnectionError),
+        retry_error_callback=log_and_reraise,
+    )
+    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
+        if stream:
+            return await self._achat_completion_stream(messages)
+        resp = await self._achat_completion(messages)
+        return self.get_choice_text(resp)
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@ -5,8 +5,6 @@
 from enum import Enum
 from typing import Optional

-import openai
-import zhipuai
 from requests import ConnectionError
 from tenacity import (
    after_log,
@ -15,6 +13,7 @@ from tenacity import (
    stop_after_attempt,
    wait_random_exponential,
 )
+from zhipuai.types.chat.chat_completion import Completion

 from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.logs import log_llm_stream, logger
@ -40,29 +39,23 @@ class ZhiPuAILLM(BaseLLM):
    """

    def __init__(self, config: LLMConfig):
-        self.__init_zhipuai(config)
        self.config = config
-        self.llm = ZhiPuModelAPI
-        self.model = "chatglm_turbo"  # so far only one model, just use it
-        self.pricing_plan = self.config.pricing_plan or self.model
-        self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
+        self.__init_zhipuai()
        self.cost_manager: Optional[CostManager] = None

-    def __init_zhipuai(self, config: LLMConfig):
-        assert config.api_key
-        zhipuai.api_key = config.api_key
-        # due to use openai sdk, set the api_key but it will't be used.
-        # openai.api_key = zhipuai.api_key  # due to use openai sdk, set the api_key but it will't be used.
-        if config.proxy:
-            # FIXME: openai v1.x sdk has no proxy support
-            openai.proxy = config.proxy
+    def __init_zhipuai(self):
+        assert self.config.api_key
+        self.api_key = self.config.api_key
+        self.model = self.config.model  # so far, it support glm-3-turbo、glm-4
+        self.pricing_plan = self.config.pricing_plan or self.model
+        self.llm = ZhiPuModelAPI(api_key=self.api_key)

    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
        kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
        return kwargs

    def completion(self, messages: list[dict], timeout=3) -> dict:
-        resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
+        resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
        usage = resp.usage.model_dump()
        self._update_costs(usage)
        return resp.model_dump()
--- a/metagpt/roles/mi/init.py
+++ b/metagpt/roles/mi/init.py
--- a/metagpt/roles/ci/code_interpreter.py
+++ b/metagpt/roles/ci/code_interpreter.py
@ -2,9 +2,9 @@ from __future__ import annotations

 from pydantic import Field

-from metagpt.actions.ci.ask_review import ReviewConst
-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
-from metagpt.actions.ci.write_analysis_code import (
+from metagpt.actions.mi.ask_review import ReviewConst
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.write_analysis_code import (
    WriteCodeWithoutTools,
    WriteCodeWithTools,
 )
@ -13,9 +13,9 @@ from metagpt.roles import Role
 from metagpt.schema import Message, Task, TaskResult


-class CodeInterpreter(Role):
-    name: str = "Charlie"
-    profile: str = "CodeInterpreter"
+class Interpreter(Role):
+    name: str = "Ivy"
+    profile: str = "Interpreter"
    auto_run: bool = True
    use_tools: bool = False
    execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
@ -72,11 +72,7 @@ class CodeInterpreter(Role):
                if ReviewConst.CHANGE_WORDS[0] in review:
                    counter = 0  # redo the task again with help of human suggestions

-        py_code = (
-            code["code"] if code.get("language") == "python" else ""
-        )  # use python code as final code; for markdown, return the rendered result instead of the code itself
-
-        return py_code, result, success
+        return code["code"], result, success

    async def _write_code(self):
        todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
--- a/metagpt/roles/mi/ml_engineer.py
+++ b/metagpt/roles/mi/ml_engineer.py
@ -1,13 +1,13 @@
-from metagpt.actions.ci.debug_code import DebugCode
-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
-from metagpt.actions.ci.ml_action import UpdateDataColumns, WriteCodeWithToolsML
+from metagpt.actions.mi.debug_code import DebugCode
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.ml_action import UpdateDataColumns, WriteCodeWithToolsML
 from metagpt.logs import logger
-from metagpt.roles.ci.code_interpreter import CodeInterpreter
+from metagpt.roles.mi.interpreter import Interpreter
 from metagpt.tools.tool_type import ToolType
 from metagpt.utils.common import any_to_str


-class MLEngineer(CodeInterpreter):
+class MLEngineer(Interpreter):
    name: str = "Mark"
    profile: str = "MLEngineer"
    debug_context: list = []
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -281,7 +281,7 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
                i = action
            self._init_action(i)
            self.actions.append(i)
-            self.states.append(f"{len(self.actions)}. {action}")
+            self.states.append(f"{len(self.actions) - 1}. {action}")

    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1, auto_run: bool = True, use_tools: bool = False):
        """Set strategy of the Role reacting to observed Message. Variation lies in how
--- a/metagpt/software_company.py
+++ b/metagpt/software_company.py
@ -2,14 +2,11 @@
 # -*- coding: utf-8 -*-

 import asyncio
-import shutil
 from pathlib import Path

 import typer

-from metagpt.config2 import config
-from metagpt.const import CONFIG_ROOT, METAGPT_ROOT
-from metagpt.context import Context
+from metagpt.const import CONFIG_ROOT
 from metagpt.utils.project_repo import ProjectRepo

 app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
@ -30,6 +27,8 @@ def generate_repo(
    recover_path=None,
 ) -> ProjectRepo:
    """Run the startup logic. Can be called from CLI or other Python scripts."""
+    from metagpt.config2 import config
+    from metagpt.context import Context
    from metagpt.roles import (
        Architect,
        Engineer,
@ -122,7 +121,17 @@ def startup(
    )


-def copy_config_to(config_path=METAGPT_ROOT / "config" / "config2.yaml"):
+DEFAULT_CONFIG = """# Full Example: https://github.com/geekan/MetaGPT/blob/main/config/config2.example.yaml
+# Reflected Code: https://github.com/geekan/MetaGPT/blob/main/metagpt/config2.py
+llm:
+  api_type: "openai"  # or azure / ollama / open_llm etc. Check LLMType for more options
+  model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
+  base_url: "https://api.openai.com/v1"  # or forward url / other llm url
+  api_key: "YOUR_API_KEY"
+"""
+
+
+def copy_config_to():
    """Initialize the configuration file for MetaGPT."""
    target_path = CONFIG_ROOT / "config2.yaml"

@ -136,7 +145,7 @@ def copy_config_to(config_path=METAGPT_ROOT / "config" / "config2.yaml"):
        print(f"Existing configuration file backed up at {backup_path}")

    # 复制文件
-    shutil.copy(str(config_path), target_path)
+    target_path.write_text(DEFAULT_CONFIG, encoding="utf-8")
    print(f"Configuration file initialized at {target_path}")


--- a/metagpt/strategy/planner.py
+++ b/metagpt/strategy/planner.py
@ -4,8 +4,8 @@ import json

 from pydantic import BaseModel, Field

-from metagpt.actions.ci.ask_review import AskReview, ReviewConst
-from metagpt.actions.ci.write_plan import (
+from metagpt.actions.mi.ask_review import AskReview, ReviewConst
+from metagpt.actions.mi.write_plan import (
    WritePlan,
    precheck_update_plan_from_rsp,
    update_plan_from_rsp,
@ -122,7 +122,7 @@ class Planner(BaseModel):
        )  # "confirm, ... (more content, such as changing downstream tasks)"
        if confirmed_and_more:
            self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
-            await self.update_plan(review)
+            await self.update_plan()

    def get_useful_memories(self, task_exclude_field=None) -> list[Message]:
        """find useful memories only to reduce context length and improve performance"""
--- a/metagpt/strategy/solver.py
+++ b/metagpt/strategy/solver.py
@ -49,8 +49,8 @@ class TOTSolver(BaseSolver):
        raise NotImplementedError


-class CodeInterpreterSolver(BaseSolver):
-    """CodeInterpreterSolver: Write&Run code in the graph"""
+class InterpreterSolver(BaseSolver):
+    """InterpreterSolver: Write&Run code in the graph"""

    async def solve(self):
        raise NotImplementedError
--- a/metagpt/tools/libs/init.py
+++ b/metagpt/tools/libs/init.py
@ -10,6 +10,14 @@ from metagpt.tools.libs import (
    sd_engine,
    gpt_v_generator,
    web_scraping,
+    email_login,
 )

-_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scraping  # Avoid pre-commit error
+_ = (
+    data_preprocess,
+    feature_engineering,
+    sd_engine,
+    gpt_v_generator,
+    web_scraping,
+    email_login,
+)  # Avoid pre-commit error
--- a/metagpt/tools/libs/email_login.py
+++ b/metagpt/tools/libs/email_login.py
@ -0,0 +1,50 @@
+from imap_tools import MailBox
+
+from metagpt.tools.tool_registry import register_tool
+from metagpt.tools.tool_type import ToolType
+
+# Define a dictionary mapping email domains to their IMAP server addresses
+IMAP_SERVERS = {
+    "outlook.com": "imap-mail.outlook.com",  # Outlook
+    "163.com": "imap.163.com",  # 163 Mail
+    "qq.com": "imap.qq.com",  # QQ Mail
+    "gmail.com": "imap.gmail.com",  # Gmail
+    "yahoo.com": "imap.mail.yahoo.com",  # Yahoo Mail
+    "icloud.com": "imap.mail.me.com",  # iCloud Mail
+    "hotmail.com": "imap-mail.outlook.com",  # Hotmail (同 Outlook)
+    "live.com": "imap-mail.outlook.com",  # Live (同 Outlook)
+    "sina.com": "imap.sina.com",  # Sina Mail
+    "sohu.com": "imap.sohu.com",  # Sohu Mail
+    "yahoo.co.jp": "imap.mail.yahoo.co.jp",  # Yahoo Mail Japan
+    "yandex.com": "imap.yandex.com",  # Yandex Mail
+    "mail.ru": "imap.mail.ru",  # Mail.ru
+    "aol.com": "imap.aol.com",  # AOL Mail
+    "gmx.com": "imap.gmx.com",  # GMX Mail
+    "zoho.com": "imap.zoho.com",  # Zoho Mail
+}
+
+
+@register_tool(tool_type=ToolType.EMAIL_LOGIN.type_name)
+def email_login_imap(email_address, email_password):
+    """
+    Use imap_tools package to log in to your email (the email that supports IMAP protocol) to verify and return the account object.
+
+    Args:
+        email_address (str): Email address that needs to be logged in and linked.
+        email_password (str): Password for the email address that needs to be logged in and linked.
+
+    Returns:
+        object: The imap_tools's MailBox object returned after successfully connecting to the mailbox through imap_tools, including various information about this account (email, etc.), or None if login fails.
+    """
+
+    # Extract the domain from the email address
+    domain = email_address.split("@")[-1]
+
+    # Determine the correct IMAP server
+    imap_server = IMAP_SERVERS.get(domain)
+
+    assert imap_server, f"IMAP server for {domain} not found."
+
+    # Attempt to log in to the email account
+    mailbox = MailBox(imap_server).login(email_address, email_password)
+    return mailbox
--- a/metagpt/tools/libs/gpt_v_generator.py
+++ b/metagpt/tools/libs/gpt_v_generator.py
@ -13,12 +13,12 @@ from metagpt.tools.tool_registry import register_tool
 from metagpt.tools.tool_type import ToolType
 from metagpt.utils.common import encode_image

-ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
+ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX designer, please generate layout information for this image:

 NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
 As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """

-GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages
+GENERATE_PROMPT = """You are now a UI/UX designer and Web developer. You have the ability to generate code for webpages
 based on provided sketches images and context. 
 Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript.

--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@ -9,7 +9,6 @@ from __future__ import annotations

 import inspect
 import os
-import re
 from collections import defaultdict

 import yaml
@ -109,7 +108,8 @@ def register_tool(tool_type: str = "other", schema_path: str = "", **kwargs):
        # Get the file path where the function / class is defined and the source code
        file_path = inspect.getfile(cls)
        if "metagpt" in file_path:
-            file_path = re.search("metagpt.+", file_path).group(0)
+            # split to handle ../metagpt/metagpt/tools/... where only metapgt/tools/... is needed
+            file_path = "metagpt" + file_path.split("metagpt")[-1]
        source_code = inspect.getsource(cls)

        TOOL_REGISTRY.register_tool(
--- a/metagpt/tools/tool_type.py
+++ b/metagpt/tools/tool_type.py
@ -2,6 +2,7 @@ from enum import Enum

 from metagpt.prompts.tool_types import (
    DATA_PREPROCESS_PROMPT,
+    EDA_PROMPT,
    FEATURE_ENGINEERING_PROMPT,
    IMAGE2WEBPAGE_PROMPT,
    MODEL_EVALUATE_PROMPT,
@ -11,12 +12,20 @@ from metagpt.tools.tool_data_type import ToolTypeDef


 class ToolType(Enum):
-    EDA = ToolTypeDef(name="eda", desc="For performing exploratory data analysis")
+    EDA = ToolTypeDef(
+        name="eda",
+        desc="For performing exploratory data analysis",
+        usage_prompt=EDA_PROMPT,
+    )
    DATA_PREPROCESS = ToolTypeDef(
        name="data_preprocess",
        desc="Only for changing value inplace.",
        usage_prompt=DATA_PREPROCESS_PROMPT,
    )
+    EMAIL_LOGIN = ToolTypeDef(
+        name="email_login",
+        desc="For logging to an email.",
+    )
    FEATURE_ENGINEERING = ToolTypeDef(
        name="feature_engineering",
        desc="Only for creating new columns for input data.",
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@ -6,12 +6,13 @@
@Desc    : mashenquan, 2023/8/28. Separate the `CostManager` class to support user-level cost accounting.
 """

+import re
 from typing import NamedTuple

 from pydantic import BaseModel

 from metagpt.logs import logger
-from metagpt.utils.token_counter import TOKEN_COSTS
+from metagpt.utils.token_counter import FIREWORKS_GRADE_TOKEN_COSTS, TOKEN_COSTS


 class Costs(NamedTuple):
@ -29,6 +30,7 @@ class CostManager(BaseModel):
    total_budget: float = 0
    max_budget: float = 10.0
    total_cost: float = 0
+    token_costs: dict[str, dict[str, float]] = TOKEN_COSTS  # different model's token cost

    def update_cost(self, prompt_tokens, completion_tokens, model):
        """
@ -43,8 +45,13 @@ class CostManager(BaseModel):
            return
        self.total_prompt_tokens += prompt_tokens
        self.total_completion_tokens += completion_tokens
+        if model not in self.token_costs:
+            logger.warning(f"Model {model} not found in TOKEN_COSTS.")
+            return
+
        cost = (
-            prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"]
+            prompt_tokens * self.token_costs[model]["prompt"]
+            + completion_tokens * self.token_costs[model]["completion"]
        ) / 1000
        self.total_cost += cost
        logger.info(
@ -99,3 +106,44 @@ class TokenCostManager(CostManager):
        self.total_prompt_tokens += prompt_tokens
        self.total_completion_tokens += completion_tokens
        logger.info(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
+
+
+class FireworksCostManager(CostManager):
+    def model_grade_token_costs(self, model: str) -> dict[str, float]:
+        def _get_model_size(model: str) -> float:
+            size = re.findall(".*-([0-9.]+)b", model)
+            size = float(size[0]) if len(size) > 0 else -1
+            return size
+
+        if "mixtral-8x7b" in model:
+            token_costs = FIREWORKS_GRADE_TOKEN_COSTS["mixtral-8x7b"]
+        else:
+            model_size = _get_model_size(model)
+            if 0 < model_size <= 16:
+                token_costs = FIREWORKS_GRADE_TOKEN_COSTS["16"]
+            elif 16 < model_size <= 80:
+                token_costs = FIREWORKS_GRADE_TOKEN_COSTS["80"]
+            else:
+                token_costs = FIREWORKS_GRADE_TOKEN_COSTS["-1"]
+        return token_costs
+
+    def update_cost(self, prompt_tokens: int, completion_tokens: int, model: str):
+        """
+        Refs to `https://app.fireworks.ai/pricing` **Developer pricing**
+        Update the total cost, prompt tokens, and completion tokens.
+
+        Args:
+        prompt_tokens (int): The number of tokens used in the prompt.
+        completion_tokens (int): The number of tokens used in the completion.
+        model (str): The model used for the API call.
+        """
+        self.total_prompt_tokens += prompt_tokens
+        self.total_completion_tokens += completion_tokens
+
+        token_costs = self.model_grade_token_costs(model)
+        cost = (prompt_tokens * token_costs["prompt"] + completion_tokens * token_costs["completion"]) / 1000000
+        self.total_cost += cost
+        logger.info(
+            f"Total running cost: ${self.total_cost:.4f}"
+            f"Current cost: ${cost:.4f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
+        )
--- a/metagpt/utils/repair_llm_raw_output.py
+++ b/metagpt/utils/repair_llm_raw_output.py
@ -119,6 +119,7 @@ def repair_json_format(output: str) -> str:
        logger.info(f"repair_json_format: {'}]'}")
    elif output.startswith("{") and output.endswith("]"):
        output = output[:-1] + "}"
+
    # remove comments in output json string, after json value content, maybe start with #, maybe start with //
    arr = output.split("\n")
    new_arr = []
@ -208,6 +209,17 @@ def repair_invalid_json(output: str, error: str) -> str:
        elif (rline[col_no] in ["'", '"']) and (line.startswith('"') or line.startswith("'")) and "," not in line:
            # problem, `"""` or `'''` without `,`
            new_line = f",{line}"
+        elif col_no - 1 >= 0 and rline[col_no - 1] in ['"', "'"]:
+            # backslash problem like \" in the output
+            char = rline[col_no - 1]
+            nearest_char_idx = rline[col_no:].find(char)
+            new_line = (
+                rline[: col_no - 1]
+                + "\\"
+                + rline[col_no - 1 : col_no + nearest_char_idx]
+                + "\\"
+                + rline[col_no + nearest_char_idx :]
+            )
        elif '",' not in line and "," not in line and '"' not in line:
            new_line = f'{line}",'
        elif not line.endswith(","):
--- a/metagpt/utils/text.py
+++ b/metagpt/utils/text.py
@ -25,7 +25,7 @@ def reduce_message_length(
    """
    max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
    for msg in msgs:
-        if count_string_tokens(msg, model_name) < max_token:
+        if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
            return msg

    raise RuntimeError("fail to reduce message length")
@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
            continue
        ret = ["".join(j) for j in _split_by_count(sentences, count)]
        return ret
-    return _split_by_count(paragraph, count)
+    return list(_split_by_count(paragraph, count))


 def decode_unicode_escape(text: str) -> str:
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -32,9 +32,107 @@ TOKEN_COSTS = {
    "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
-    "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
+    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
    "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
+    "moonshot-v1-8k": {"prompt": 0.012, "completion": 0.012},  # prompt + completion tokens=0.012￥/k-tokens
+    "moonshot-v1-32k": {"prompt": 0.024, "completion": 0.024},
+    "moonshot-v1-128k": {"prompt": 0.06, "completion": 0.06},
+    "open-mistral-7b": {"prompt": 0.00025, "completion": 0.00025},
+    "open-mixtral-8x7b": {"prompt": 0.0007, "completion": 0.0007},
+    "mistral-small-latest": {"prompt": 0.002, "completion": 0.006},
+    "mistral-medium-latest": {"prompt": 0.0027, "completion": 0.0081},
+    "mistral-large-latest": {"prompt": 0.008, "completion": 0.024},
+}
+
+
+"""
+QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9
+Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method.
+"""
+QIANFAN_MODEL_TOKEN_COSTS = {
+    "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017},
+    "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067},
+    "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017},
+    "ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011},
+    "EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011},
+    "ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011},
+    "BLOOMZ-7B": {"prompt": 0.00056, "completion": 0.00056},
+    "Llama-2-7B-Chat": {"prompt": 0.00056, "completion": 0.00056},
+    "Llama-2-13B-Chat": {"prompt": 0.00084, "completion": 0.00084},
+    "Llama-2-70B-Chat": {"prompt": 0.0049, "completion": 0.0049},
+    "ChatGLM2-6B-32K": {"prompt": 0.00056, "completion": 0.00056},
+    "AquilaChat-7B": {"prompt": 0.00056, "completion": 0.00056},
+    "Mixtral-8x7B-Instruct": {"prompt": 0.0049, "completion": 0.0049},
+    "SQLCoder-7B": {"prompt": 0.00056, "completion": 0.00056},
+    "CodeLlama-7B-Instruct": {"prompt": 0.00056, "completion": 0.00056},
+    "XuanYuan-70B-Chat-4bit": {"prompt": 0.0049, "completion": 0.0049},
+    "Qianfan-BLOOMZ-7B-compressed": {"prompt": 0.00056, "completion": 0.00056},
+    "Qianfan-Chinese-Llama-2-7B": {"prompt": 0.00056, "completion": 0.00056},
+    "Qianfan-Chinese-Llama-2-13B": {"prompt": 0.00084, "completion": 0.00084},
+    "ChatLaw": {"prompt": 0.0011, "completion": 0.0011},
+    "Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0},
+}
+
+QIANFAN_ENDPOINT_TOKEN_COSTS = {
+    "completions_pro": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-4"],
+    "ernie_bot_8k": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"],
+    "completions": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot"],
+    "eb-instant": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"],
+    "ai_apaas": QIANFAN_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"],
+    "ernie_speed": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Speed"],
+    "bloomz_7b1": QIANFAN_MODEL_TOKEN_COSTS["BLOOMZ-7B"],
+    "llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"],
+    "llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"],
+    "llama_2_70b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"],
+    "chatglm2_6b_32k": QIANFAN_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"],
+    "aquilachat_7b": QIANFAN_MODEL_TOKEN_COSTS["AquilaChat-7B"],
+    "mixtral_8x7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"],
+    "sqlcoder_7b": QIANFAN_MODEL_TOKEN_COSTS["SQLCoder-7B"],
+    "codellama_7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"],
+    "xuanyuan_70b_chat": QIANFAN_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"],
+    "qianfan_bloomz_7b_compressed": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"],
+    "qianfan_chinese_llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"],
+    "qianfan_chinese_llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"],
+    "chatlaw": QIANFAN_MODEL_TOKEN_COSTS["ChatLaw"],
+    "yi_34b_chat": QIANFAN_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
+}
+
+"""
+DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
+Different model has different detail page. Attention, some model are free for a limited time.
+"""
+DASHSCOPE_TOKEN_COSTS = {
+    "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
+    "qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
+    "qwen-max": {"prompt": 0.0, "completion": 0.0},
+    "qwen-max-1201": {"prompt": 0.0, "completion": 0.0},
+    "qwen-max-longcontext": {"prompt": 0.0, "completion": 0.0},
+    "llama2-7b-chat-v2": {"prompt": 0.0, "completion": 0.0},
+    "llama2-13b-chat-v2": {"prompt": 0.0, "completion": 0.0},
+    "qwen-72b-chat": {"prompt": 0.0, "completion": 0.0},
+    "qwen-14b-chat": {"prompt": 0.0011, "completion": 0.0011},
+    "qwen-7b-chat": {"prompt": 0.00084, "completion": 0.00084},
+    "qwen-1.8b-chat": {"prompt": 0.0, "completion": 0.0},
+    "baichuan2-13b-chat-v1": {"prompt": 0.0011, "completion": 0.0011},
+    "baichuan2-7b-chat-v1": {"prompt": 0.00084, "completion": 0.00084},
+    "baichuan-7b-v1": {"prompt": 0.0, "completion": 0.0},
+    "chatglm-6b-v2": {"prompt": 0.0011, "completion": 0.0011},
+    "chatglm3-6b": {"prompt": 0.0, "completion": 0.0},
+    "ziya-llama-13b-v1": {"prompt": 0.0, "completion": 0.0},  # no price page, judge it as free
+    "dolly-12b-v2": {"prompt": 0.0, "completion": 0.0},
+    "belle-llama-13b-2m-v1": {"prompt": 0.0, "completion": 0.0},
+    "moss-moon-003-sft-v1": {"prompt": 0.0, "completion": 0.0},
+    "chatyuan-large-v2": {"prompt": 0.0, "completion": 0.0},
+    "billa-7b-sft-v1": {"prompt": 0.0, "completion": 0.0},
+}
+
+
+FIREWORKS_GRADE_TOKEN_COSTS = {
+    "-1": {"prompt": 0.0, "completion": 0.0},  # abnormal condition
+    "16": {"prompt": 0.2, "completion": 0.8},  # 16 means model size <= 16B; 0.2 means $0.2/1M tokens
+    "80": {"prompt": 0.7, "completion": 2.8},  # 80 means 16B < model size <= 80B
+    "mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
 }

 TOKEN_MAX = {
@ -57,8 +155,17 @@ TOKEN_MAX = {
    "gpt-4-vision-preview": 128000,
    "gpt-4-1106-vision-preview": 128000,
    "text-embedding-ada-002": 8192,
-    "chatglm_turbo": 32768,
+    "glm-3-turbo": 128000,
+    "glm-4": 128000,
    "gemini-pro": 32768,
+    "moonshot-v1-8k": 8192,
+    "moonshot-v1-32k": 32768,
+    "moonshot-v1-128k": 128000,
+    "open-mistral-7b": 8192,
+    "open-mixtral-8x7b": 32768,
+    "mistral-small-latest": 32768,
+    "mistral-medium-latest": 32768,
+    "mistral-large-latest": 32768,
 }


--- a/requirements.txt
+++ b/requirements.txt
@ -11,7 +11,7 @@ typer==0.9.0
 # godot==0.1.1
 # google_api_python_client==2.93.0  # Used by search_engine.py
 lancedb==0.4.0
-langchain==0.0.352
+langchain==0.1.8
 loguru==0.6.0
 meilisearch==0.21.0
 numpy>=1.24.3,<1.25.0
@ -27,7 +27,7 @@ python_docx==0.8.11
 PyYAML==6.0.1
 # sentence_transformers==2.2.2
 setuptools==65.6.3
-tenacity==8.2.2
+tenacity==8.2.3
 tiktoken==0.5.2
 tqdm==4.65.0
 #unstructured[local-inference]
@ -54,7 +54,7 @@ rich==13.6.0
 nbclient==0.9.0
 nbformat==5.9.2
 ipython==8.17.2
-ipykernel==6.27.0
+ipykernel==6.27.1
 scikit_learn==1.3.2
 typing-extensions==4.9.0
 socksio~=1.0.0
@ -63,7 +63,10 @@ gitignore-parser==0.1.9
 websockets~=12.0
 networkx~=3.2.1
 google-generativeai==0.3.2
-# playwright==1.40.0  # playwright extras require
-anytree==2.12.1
+playwright>=1.26  # used at metagpt/tools/libs/web_scraping.py
+anytree
 ipywidgets==8.1.1
-Pillow==10.1.0
+Pillow
+imap_tools==1.5.0  # Used by metagpt/tools/libs/email_login.py
+qianfan==0.3.2
+dashscope==1.14.1
--- a/setup.py
+++ b/setup.py
@ -24,7 +24,6 @@ requirements = (here / "requirements.txt").read_text(encoding="utf-8").splitline


 extras_require = {
-    "playwright": ["playwright>=1.26", "beautifulsoup4"],
    "selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"],
    "search-google": ["google-api-python-client==2.94.0"],
    "search-ddg": ["duckduckgo-search~=4.1.1"],
@ -58,7 +57,7 @@ extras_require["dev"] = (["pylint~=3.0.3", "black~=23.3.0", "isort~=5.12.0", "pr

 setup(
    name="metagpt",
-    version="0.7.0",
+    version="0.7.2",
    description="The Multi-Agent Framework",
    long_description=long_description,
    long_description_content_type="text/markdown",
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -14,6 +14,7 @@ import re
 import uuid
 from typing import Callable

+import aiohttp.web
 import pytest

 from metagpt.const import DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
@ -171,9 +172,8 @@ def new_filename(mocker):
    yield mocker


-@pytest.fixture(scope="session")
-def search_rsp_cache():
-    rsp_cache_file_path = TEST_DATA_PATH / "search_rsp_cache.json"  # read repo-provided
+def _rsp_cache(name):
+    rsp_cache_file_path = TEST_DATA_PATH / f"{name}.json"  # read repo-provided
    if os.path.exists(rsp_cache_file_path):
        with open(rsp_cache_file_path, "r") as f1:
            rsp_cache_json = json.load(f1)
@ -184,6 +184,16 @@ def search_rsp_cache():
        json.dump(rsp_cache_json, f2, indent=4, ensure_ascii=False)


+@pytest.fixture(scope="session")
+def search_rsp_cache():
+    yield from _rsp_cache("search_rsp_cache")
+
+
+@pytest.fixture(scope="session")
+def mermaid_rsp_cache():
+    yield from _rsp_cache("mermaid_rsp_cache")
+
+
@pytest.fixture
 def aiohttp_mocker(mocker):
    MockResponse = type("MockResponse", (MockAioResponse,), {})
@ -231,3 +241,32 @@ def search_engine_mocker(aiohttp_mocker, curl_cffi_mocker, httplib2_mocker, sear
    aiohttp_mocker.rsp_cache = httplib2_mocker.rsp_cache = curl_cffi_mocker.rsp_cache = search_rsp_cache
    aiohttp_mocker.check_funcs = httplib2_mocker.check_funcs = curl_cffi_mocker.check_funcs = check_funcs
    yield check_funcs
+
+
+@pytest.fixture
+def http_server():
+    async def handler(request):
+        return aiohttp.web.Response(
+            text="""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
+            <title>MetaGPT</title></head><body><h1>MetaGPT</h1></body></html>""",
+            content_type="text/html",
+        )
+
+    async def start():
+        server = aiohttp.web.Server(handler)
+        runner = aiohttp.web.ServerRunner(server)
+        await runner.setup()
+        site = aiohttp.web.TCPSite(runner, "localhost", 0)
+        await site.start()
+        host, port = site._server.sockets[0].getsockname()
+        return site, f"http://{host}:{port}"
+
+    return start
+
+
+@pytest.fixture
+def mermaid_mocker(aiohttp_mocker, mermaid_rsp_cache):
+    check_funcs: dict[tuple[str, str], Callable[[dict], str]] = {}
+    aiohttp_mocker.rsp_cache = mermaid_rsp_cache
+    aiohttp_mocker.check_funcs = check_funcs
+    yield check_funcs
--- a/tests/data/mermaid_rsp_cache.json
+++ b/tests/data/mermaid_rsp_cache.json
--- a/tests/data/rsp_cache.json
+++ b/tests/data/rsp_cache.json
@ -355,7 +355,7 @@
        "code": "print(data.head())"
    },
    "\n    # Context:\n    user: run analysis on sklearn iris dataset\n    # Task:\n    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to 5 tasks.\n    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.\n    If you encounter errors on the current task, revise and output the current single task only.\n    Output a list of jsons following the format:\n    ```json\n    [\n        {\n            \"task_id\": str = \"unique identifier for a task in plan, can be an ordinal\",\n            \"dependent_task_ids\": list[str] = \"ids of tasks prerequisite to this task\",\n            \"instruction\": \"what you should do in this task, one short phrase or sentence\",\n        },\n        ...\n    ]\n    ```\n    ": "```json\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Import the Iris dataset from sklearn.datasets\"\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\"1\"],\n        \"instruction\": \"Perform exploratory data analysis to understand the dataset\"\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\"2\"],\n        \"instruction\": \"Preprocess the data to prepare it for modeling\"\n    },\n    {\n        \"task_id\": \"4\",\n        \"dependent_task_ids\": [\"3\"],\n        \"instruction\": \"Split the dataset into training and testing sets\"\n    },\n    {\n        \"task_id\": \"5\",\n        \"dependent_task_ids\": [\"4\"],\n        \"instruction\": \"Train a classifier using the training set and evaluate it using the test set\"\n    }\n]\n```",
-    "[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Import the Iris dataset from sklearn.datasets\\nTask 2: Perform exploratory data analysis to understand the dataset\\nTask 3: Preprocess the data to prepare it for modeling\\nTask 4: Split the dataset into training and testing sets\\nTask 5: Train a classifier using the training set and evaluate it using the test set\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
+    "[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Import the Iris dataset from sklearn.datasets\\nTask 2: Perform exploratory data analysis to understand the dataset\\nTask 3: Preprocess the data to prepare it for modeling\\nTask 4: Split the dataset into training and testing sets\\nTask 5: Train a classifier using the training set and evaluate it using the test set\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **email_login**: For logging to an email.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
        "task_type": [
            "other",
            "eda",
@ -366,24 +366,27 @@
        ]
    },
    "\n    # Context:\n    user: \n## User Requirement\nRun data analysis on sklearn Iris dataset, include a plot\n## Context\n\n## Current Plan\n[]\n## Current Task\n{}\n\n    # Task:\n    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to 3 tasks.\n    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.\n    If you encounter errors on the current task, revise and output the current single task only.\n    Output a list of jsons following the format:\n    ```json\n    [\n        {\n            \"task_id\": str = \"unique identifier for a task in plan, can be an ordinal\",\n            \"dependent_task_ids\": list[str] = \"ids of tasks prerequisite to this task\",\n            \"instruction\": \"what you should do in this task, one short phrase or sentence\",\n        },\n        ...\n    ]\n    ```\n    ": "```json\n[\n    {\n        \"task_id\": \"1\",\n        \"dependent_task_ids\": [],\n        \"instruction\": \"Load the sklearn Iris dataset.\"\n    },\n    {\n        \"task_id\": \"2\",\n        \"dependent_task_ids\": [\"1\"],\n        \"instruction\": \"Perform exploratory data analysis on the Iris dataset.\"\n    },\n    {\n        \"task_id\": \"3\",\n        \"dependent_task_ids\": [\"2\"],\n        \"instruction\": \"Create a plot visualizing the Iris dataset.\"\n    }\n]\n```",
-    "[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Load the sklearn Iris dataset.\\nTask 2: Perform exploratory data analysis on the Iris dataset.\\nTask 3: Create a plot visualizing the Iris dataset.\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
+    "[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Load the sklearn Iris dataset.\\nTask 2: Perform exploratory data analysis on the Iris dataset.\\nTask 3: Create a plot visualizing the Iris dataset.\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **email_login**: For logging to an email.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
        "task_type": [
-            "other",
+            "data_preprocess",
            "eda",
            "other"
        ]
    },
-    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"1\\\",\\\"dependent_task_ids\\\":[],\\\"instruction\\\":\\\"Load the sklearn Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
-        "code": "from sklearn.datasets import load_iris\niris_data = load_iris()"
+    "[{\"role\": \"user\", \"content\": \"\\n## User Requirement:\\nLoad the sklearn Iris dataset.\\n\\n## Task\\nRecommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'. \\n\\n## Available Tools:\\n{'FillMissingValue': 'Completing missing values with simple strategies.', 'MinMaxScale': 'Transform features by scaling each feature to a range, which is (0, 1).', 'StandardScale': 'Standardize features by removing the mean and scaling to unit variance.', 'MaxAbsScale': 'Scale each feature by its maximum absolute value.', 'RobustScale': 'Apply the RobustScaler to scale features using statistics that are robust to outliers.', 'OrdinalEncode': 'Encode categorical features as ordinal integers.', 'OneHotEncode': 'Apply one-hot encoding to specified categorical columns, the original columns will be dropped.', 'LabelEncode': 'Apply label encoding to specified categorical columns in-place.'}\\n\\n## Tool Selection and Instructions:\\n- Select tools most relevant to completing the 'User Requirement'.\\n- If you believe that no tools are suitable, indicate with an empty list.\\n- Only list the names of the tools, not the full schema of each tool.\\n- Ensure selected tools are listed in 'Available Tools'.\\n\"}]": {
+        "recommend_tools": []
    },
-    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
-        "code": "from sklearn import datasets\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\ndf_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)\ndf_iris['target'] = iris.target\n\n# Display basic information about the dataset\nprint(df_iris.info())\n\n# Display statistical summary of the dataset\nprint(df_iris.describe())\n\n# Display the first few rows of the dataset\nprint(df_iris.head())\n\n# Display the distribution of the target variable\ntarget_counts = df_iris['target'].value_counts()\nprint(target_counts)"
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"data_preprocess\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"1\\\",\\\"dependent_task_ids\\\":[],\\\"instruction\\\":\\\"Load the sklearn Iris dataset.\\\",\\\"task_type\\\":\\\"data_preprocess\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about data preprocessing, please note the following:\\n- Monitor data types per column, applying appropriate methods.\\n- Ensure operations are on existing dataset columns.\\n- Avoid writing processed data to files.\\n- Avoid any change to label column, such as standardization, etc.\\n- Prefer alternatives to one-hot encoding for categorical data.\\n- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.\\n- Each step do data preprocessing to train, must do same for test separately at the same time.\\n\\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "from sklearn.datasets import load_iris\niris_data = load_iris()\nX, y = iris_data.data, iris_data.target"
    },
-    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
-        "code": "from sklearn import datasets\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\niris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)\niris_df['target'] = iris.target\niris_df['target_name'] = iris_df['target'].apply(lambda x: iris.target_names[x])\n\n# Plotting\nfig, ax = plt.subplots(figsize=(12, 8))\nfor target, target_name in zip(iris.target_names, iris.target_names):\n    subset = iris_df[iris_df['target_name'] == target_name]\n    ax.scatter(subset[iris.feature_names[0]], subset[iris.feature_names[1]], label=target_name)\n\nax.set_xlabel(iris.feature_names[0])\nax.set_ylabel(iris.feature_names[1])\nax.legend()\nplt.show()"
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"data_preprocess\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\\nX, y = iris_data.data, iris_data.target\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about exploratory data analysis, please note the following:\\n- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.\\n- Remember to `import numpy as np` before using Numpy functions.\\n\\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import numpy as np\nimport pandas as pd\n\n# Convert the sklearn dataset to a pandas DataFrame for easier manipulation\nfeature_names = iris_data.feature_names\niris_df = pd.DataFrame(X, columns=feature_names)\n\n# Add target column to the DataFrame\niris_df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)\n\n# Display basic information about the dataset\niris_info = iris_df.info()\n\n# Display basic statistics about the dataset\niris_description = iris_df.describe()\n\n# Check for missing values\nmissing_values = iris_df.isnull().sum()\n\n# Distinguish column types\nnumerical_cols = iris_df.select_dtypes(include=[np.number]).columns.tolist()\ncategorical_cols = iris_df.select_dtypes(include=['category']).columns.tolist()\n\n# Display the first few rows of the DataFrame\nhead = iris_df.head()\n\n# Output the results\n(iris_info, iris_description, missing_values, numerical_cols, categorical_cols, head)"
    },
-    "[{\"role\": \"user\", \"content\": \"\\n# Background\\nAs a data scientist, you need to help user to achieve their goal [This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.] step-by-step in an continuous Jupyter notebook.\\n\\n## Done Tasks\\n```python\\n\\n```end\\n\\n## Current Task\\nPerform exploratory data analysis on the train dataset to understand the features and target variable.\\n\\n# Latest Data Info\\nLatest data info after previous tasks:\\n\\n\\n# Task\\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Output Example:\\nwhen current task is \\\"train a lightgbm model on training data\\\", the code can be like:\\n```python\\n# Step 1: check data type and convert to numeric\\nobj_cols = train.select_dtypes(include='object').columns.tolist()\\n\\nfor col in obj_cols:\\n    encoder = LabelEncoder()\\n    train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])\\n    test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')\\n    test[col] = encoder.transform(test[col])\\n\\n# Step 2: train lightgbm model\\nmodel = LGBMClassifier()\\nmodel.fit(train, y_train)\\n```end\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n\"}]": {
-        "code": "# Perform exploratory data analysis on the train dataset\ndf_train = pd.read_csv('tests/data/ml_datasets/titanic/split_train.csv')\n\n# Display the first few rows of the dataset\ndisplay(df_train.head())\n\n# Summary statistics for numerical features\ndisplay(df_train.describe())\n\n# Summary information about the dataset including the data types and number of non-null values\ndisplay(df_train.info())\n\n# Distribution of the target variable 'Survived'\nsurvival_counts = df_train['Survived'].value_counts()\nprint(\"Survival counts:\\n\", survival_counts)\n\n# Visualizations\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Distribution of the target variable\nsns.countplot(x='Survived', data=df_train)\nplt.title('Distribution of Survival')\nplt.show()\n\n# Correlation matrix heatmap to understand the relationship between features\nplt.figure(figsize=(10, 8))\nsns.heatmap(df_train.corr(), annot=True, fmt='.2f')\nplt.title('Correlation Matrix')\nplt.show()\n\n# Pairplot to visualize the pairwise relationships between features\nsns.pairplot(df_train, hue='Survived')\nplt.title('Pairplot of Features')\nplt.show()"
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"data_preprocess\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\\nX, y = iris_data.data, iris_data.target\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"import numpy as np\\\\nimport pandas as pd\\\\n\\\\n# Convert the sklearn dataset to a pandas DataFrame for easier manipulation\\\\nfeature_names = iris_data.feature_names\\\\niris_df = pd.DataFrame(X, columns=feature_names)\\\\n\\\\n# Add target column to the DataFrame\\\\niris_df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)\\\\n\\\\n# Display basic information about the dataset\\\\niris_info = iris_df.info()\\\\n\\\\n# Display basic statistics about the dataset\\\\niris_description = iris_df.describe()\\\\n\\\\n# Check for missing values\\\\nmissing_values = iris_df.isnull().sum()\\\\n\\\\n# Distinguish column types\\\\nnumerical_cols = iris_df.select_dtypes(include=[np.number]).columns.tolist()\\\\ncategorical_cols = iris_df.select_dtypes(include=['category']).columns.tolist()\\\\n\\\\n# Display the first few rows of the DataFrame\\\\nhead = iris_df.head()\\\\n\\\\n# Output the results\\\\n(iris_info, iris_description, missing_values, numerical_cols, categorical_cols, head)\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set(style='whitegrid')\n\n# Create a pairplot to visualize the relationships between the features\npairplot = sns.pairplot(iris_df, hue='species')\nplt.show()"
+    },
+    "[{\"role\": \"user\", \"content\": \"\\n# Background\\nAs a data scientist, you need to help user to achieve their goal [This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.] step-by-step in an continuous Jupyter notebook.\\n\\n## Done Tasks\\n```python\\n\\n```end\\n\\n## Current Task\\nPerform exploratory data analysis on the train dataset to understand the features and target variable.\\n\\n# Latest Data Info\\nLatest data info after previous tasks:\\n\\n\\n# Task\\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about exploratory data analysis, please note the following:\\n- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.\\n- Remember to `import numpy as np` before using Numpy functions.\\n\\n\\n# Output Example:\\nwhen current task is \\\"train a lightgbm model on training data\\\", the code can be like:\\n```python\\n# Step 1: check data type and convert to numeric\\nobj_cols = train.select_dtypes(include='object').columns.tolist()\\n\\nfor col in obj_cols:\\n    encoder = LabelEncoder()\\n    train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])\\n    test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')\\n    test[col] = encoder.transform(test[col])\\n\\n# Step 2: train lightgbm model\\nmodel = LGBMClassifier()\\nmodel.fit(train, y_train)\\n```end\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n\"}]": {
+        "code": "# Perform exploratory data analysis on the train dataset\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Read the train dataset\ntrain_data = pd.read_csv('tests/data/ml_datasets/titanic/split_train.csv')\n\n# Display the first few rows of the dataset\ndisplay(train_data.head())\n\n# Summary statistics for numerical features\nprint(train_data.describe())\n\n# Summary statistics for categorical features\nprint(train_data.describe(include=['O']))\n\n# Check for missing values\nprint(train_data.isnull().sum())\n\n# Distribution of the target variable\nsns.countplot(x='Survived', data=train_data)\nplt.title('Distribution of Survival on the Titanic')\nplt.show()\n\n# Correlation matrix for numerical features\nnumerical_features = train_data.select_dtypes(include=[np.number])\ncorrelation_matrix = numerical_features.corr()\nplt.figure(figsize=(10, 8))\nsns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)\nplt.title('Correlation Matrix for Numerical Features')\nplt.show()\n\n# Pairplot for selected features\nselected_features = ['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']\nsns.pairplot(train_data[selected_features], hue='Survived')\nplt.show()\n\n# Boxplot for categorical features vs Survived\nfor column in train_data.select_dtypes(include=['O']).columns:\n    if column != 'Survived':\n        plt.figure(figsize=(10, 5))\n        sns.boxplot(x='Survived', y=column, data=train_data)\n        plt.title(f'Survived vs {column}')\n        plt.show()\n"
    },
    "[{\"role\": \"system\", \"content\": \"You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation \"}, {\"role\": \"user\", \"content\": \"\\nHere is an example for you.\\n\\nExample 1:\\n[previous impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n   \\\"\\\"\\\"\\n   Given integers a and b, return the total value of a and b.\\n   \\\"\\\"\\\"\\n   return a - b\\n```\\n\\n[runtime Error]:\\nTested passed:\\n\\nTests failed:\\nassert add(1, 2) == 3 # output: -1\\nassert add(1, 2) == 4 # output: -1\\n\\n[reflection on previous impl]:\\nThe implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.\\n\\n[improved impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n   \\\"\\\"\\\"\\n   Given integers a and b, return the total value of a and b.\\n   \\\"\\\"\\\"\\n   return a + b\\n```\\n\\n[context]\\nSolve the problem in Python:\\ndef sort_array(arr):\\n    \\\"\\\"\\\"\\n    In this Kata, you have to sort an array of non-negative integers according to\\n    number of ones in their binary representation in ascending order.\\n    For similar number of ones, sort based on decimal value.\\n\\n    It must be implemented like this:\\n    >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]\\n    >>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]\\n    >>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]\\n    \\\"\\\"\\\"\\n\\n\\n[previous impl]\\n\\ndef sort_array(arr):\\n    # Helper function to count the number of ones in the binary representation\\n    def count_ones(n):\\n        return bin(n).count('1')\\n    \\n    # Sort the array using a custom key function\\n    # The key function returns a tuple (number of ones, value) for each element\\n    # This ensures that if two elements have the same number of ones, they are sorted by their value\\n    sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x))\\n    \\n    return sorted_arr\\n```\\n\\n[runtime Error]\\n[user: Tested passed:\\n\\nTests failed:\\nassert sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5] # output: [1, 2, 4, 3, 5]\\n]\\n\\nAnalysis the error step by step, provide me improve method and code. Remember to follow [context] requirement. Don't forget write code for steps behind the error step.\\n[reflection on previous impl]:\\nxxx\\n\"}]": {
        "reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.",
--- a/tests/metagpt/actions/mi/test_ask_review.py
+++ b/tests/metagpt/actions/mi/test_ask_review.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions.ci.ask_review import AskReview
+from metagpt.actions.mi.ask_review import AskReview


@pytest.mark.asyncio
--- a/tests/metagpt/actions/mi/test_debug_code.py
+++ b/tests/metagpt/actions/mi/test_debug_code.py
@ -5,7 +5,7 @@

 import pytest

-from metagpt.actions.ci.debug_code import DebugCode
+from metagpt.actions.mi.debug_code import DebugCode
 from metagpt.schema import Message

 ErrorStr = """Tested passed:
--- a/tests/metagpt/actions/mi/test_execute_nb_code.py
+++ b/tests/metagpt/actions/mi/test_execute_nb_code.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode, truncate
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode, truncate


@pytest.mark.asyncio
--- a/tests/metagpt/actions/mi/test_ml_action.py
+++ b/tests/metagpt/actions/mi/test_ml_action.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions.ci.ml_action import WriteCodeWithToolsML
+from metagpt.actions.mi.ml_action import WriteCodeWithToolsML
 from metagpt.schema import Plan, Task


--- a/tests/metagpt/actions/mi/test_write_analysis_code.py
+++ b/tests/metagpt/actions/mi/test_write_analysis_code.py
@ -2,8 +2,8 @@ import asyncio

 import pytest

-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
-from metagpt.actions.ci.write_analysis_code import (
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.write_analysis_code import (
    WriteCodeWithoutTools,
    WriteCodeWithTools,
 )
--- a/tests/metagpt/actions/mi/test_write_plan.py
+++ b/tests/metagpt/actions/mi/test_write_plan.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions.ci.write_plan import (
+from metagpt.actions.mi.write_plan import (
    Plan,
    Task,
    WritePlan,
--- a/tests/metagpt/actions/test_rebuild_class_view.py
+++ b/tests/metagpt/actions/test_rebuild_class_view.py
@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView
 from metagpt.llm import LLM


+@pytest.mark.skip
@pytest.mark.asyncio
 async def test_rebuild(context):
    action = RebuildClassView(
--- a/tests/metagpt/document_store/test_faiss_store.py
+++ b/tests/metagpt/document_store/test_faiss_store.py
@ -6,6 +6,9 @@
@File    : test_faiss_store.py
 """

+from typing import Optional
+
+import numpy as np
 import pytest

 from metagpt.const import EXAMPLE_PATH
@ -14,8 +17,17 @@ from metagpt.logs import logger
 from metagpt.roles import Sales


+def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
+    num = len(texts)
+    embeds = np.random.randint(1, 100, size=(num, 1536))  # 1536: openai embedding dim
+    embeds = (embeds - embeds.mean(axis=0)) / (embeds.std(axis=0))
+    return embeds
+
+
@pytest.mark.asyncio
-async def test_search_json():
+async def test_search_json(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
    store = FaissStore(EXAMPLE_PATH / "example.json")
    role = Sales(profile="Sales", store=store)
    query = "Which facial cleanser is good for oily skin?"
@ -24,7 +36,9 @@ async def test_search_json():


@pytest.mark.asyncio
-async def test_search_xlsx():
+async def test_search_xlsx(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
    store = FaissStore(EXAMPLE_PATH / "example.xlsx")
    role = Sales(profile="Sales", store=store)
    query = "Which facial cleanser is good for oily skin?"
@ -33,7 +47,9 @@ async def test_search_xlsx():


@pytest.mark.asyncio
-async def test_write():
+async def test_write(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
    store = FaissStore(EXAMPLE_PATH / "example.xlsx", meta_col="Answer", content_col="Question")
    _faiss_store = store.write()
    assert _faiss_store.docstore
--- a/tests/metagpt/memory/mock_text_embed.py
+++ b/tests/metagpt/memory/mock_text_embed.py
@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   :
+
+from typing import Optional
+
+import numpy as np
+
+dim = 1536  # openai embedding dim
+
+text_embed_arr = [
+    {"text": "Write a cli snake game", "embed": np.zeros(shape=[1, dim])},  # mock data, same as below
+    {"text": "Write a game of cli snake", "embed": np.zeros(shape=[1, dim])},
+    {"text": "Write a 2048 web game", "embed": np.ones(shape=[1, dim])},
+    {"text": "Write a Battle City", "embed": np.ones(shape=[1, dim])},
+    {
+        "text": "The user has requested the creation of a command-line interface (CLI) snake game",
+        "embed": np.zeros(shape=[1, dim]),
+    },
+    {"text": "The request is command-line interface (CLI) snake game", "embed": np.zeros(shape=[1, dim])},
+    {
+        "text": "Incorporate basic features of a snake game such as scoring and increasing difficulty",
+        "embed": np.ones(shape=[1, dim]),
+    },
+]
+
+text_idx_dict = {item["text"]: idx for idx, item in enumerate(text_embed_arr)}
+
+
+def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
+    idx = text_idx_dict.get(texts[0])
+    embed = text_embed_arr[idx].get("embed")
+    return embed
--- a/tests/metagpt/memory/test_longterm_memory.py
+++ b/tests/metagpt/memory/test_longterm_memory.py
@ -4,20 +4,22 @@
@Desc   : unittest of `metagpt/memory/longterm_memory.py`
 """

-import os

 import pytest

 from metagpt.actions import UserRequirement
-from metagpt.config2 import config
 from metagpt.memory.longterm_memory import LongTermMemory
 from metagpt.roles.role import RoleContext
 from metagpt.schema import Message
-
-os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key)
+from tests.metagpt.memory.mock_text_embed import (
+    mock_openai_embed_documents,
+    text_embed_arr,
+)


-def test_ltm_search():
+def test_ltm_search(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
    role_id = "UTUserLtm(Product Manager)"
    from metagpt.environment import Environment

@ -27,20 +29,20 @@ def test_ltm_search():
    ltm = LongTermMemory()
    ltm.recover_memory(role_id, rc)

-    idea = "Write a cli snake game"
+    idea = text_embed_arr[0].get("text", "Write a cli snake game")
    message = Message(role="User", content=idea, cause_by=UserRequirement)
    news = ltm.find_news([message])
    assert len(news) == 1
    ltm.add(message)

-    sim_idea = "Write a game of cli snake"
+    sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")

    sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
    news = ltm.find_news([sim_message])
    assert len(news) == 0
    ltm.add(sim_message)

-    new_idea = "Write a 2048 web game"
+    new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
    new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
    news = ltm.find_news([new_message])
    assert len(news) == 1
@ -56,7 +58,7 @@ def test_ltm_search():
    news = ltm_new.find_news([sim_message])
    assert len(news) == 0

-    new_idea = "Write a Battle City"
+    new_idea = text_embed_arr[3].get("text", "Write a Battle City")
    new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
    news = ltm_new.find_news([new_message])
    assert len(news) == 1
--- a/tests/metagpt/memory/test_memory_storage.py
+++ b/tests/metagpt/memory/test_memory_storage.py
@ -4,23 +4,25 @@
@Desc   : the unittests of metagpt/memory/memory_storage.py
 """

-import os
 import shutil
 from pathlib import Path
 from typing import List

 from metagpt.actions import UserRequirement, WritePRD
 from metagpt.actions.action_node import ActionNode
-from metagpt.config2 import config
 from metagpt.const import DATA_PATH
 from metagpt.memory.memory_storage import MemoryStorage
 from metagpt.schema import Message
-
-os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key)
+from tests.metagpt.memory.mock_text_embed import (
+    mock_openai_embed_documents,
+    text_embed_arr,
+)


-def test_idea_message():
-    idea = "Write a cli snake game"
+def test_idea_message(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
+    idea = text_embed_arr[0].get("text", "Write a cli snake game")
    role_id = "UTUser1(Product Manager)"
    message = Message(role="User", content=idea, cause_by=UserRequirement)

@ -33,12 +35,12 @@ def test_idea_message():
    memory_storage.add(message)
    assert memory_storage.is_initialized is True

-    sim_idea = "Write a game of cli snake"
+    sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")
    sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
    new_messages = memory_storage.search_dissimilar(sim_message)
    assert len(new_messages) == 0  # similar, return []

-    new_idea = "Write a 2048 web game"
+    new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
    new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
    new_messages = memory_storage.search_dissimilar(new_message)
    assert new_messages[0].content == message.content
@ -47,13 +49,17 @@ def test_idea_message():
    assert memory_storage.is_initialized is False


-def test_actionout_message():
+def test_actionout_message(mocker):
+    mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
+
    out_mapping = {"field1": (str, ...), "field2": (List[str], ...)}
    out_data = {"field1": "field1 value", "field2": ["field2 value1", "field2 value2"]}
    ic_obj = ActionNode.create_model_class("prd", out_mapping)

    role_id = "UTUser2(Architect)"
-    content = "The user has requested the creation of a command-line interface (CLI) snake game"
+    content = text_embed_arr[4].get(
+        "text", "The user has requested the creation of a command-line interface (CLI) snake game"
+    )
    message = Message(
        content=content, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD
    )  # WritePRD as test action
@ -67,12 +73,14 @@ def test_actionout_message():
    memory_storage.add(message)
    assert memory_storage.is_initialized is True

-    sim_conent = "The request is command-line interface (CLI) snake game"
+    sim_conent = text_embed_arr[5].get("text", "The request is command-line interface (CLI) snake game")
    sim_message = Message(content=sim_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
    new_messages = memory_storage.search_dissimilar(sim_message)
    assert len(new_messages) == 0  # similar, return []

-    new_conent = "Incorporate basic features of a snake game such as scoring and increasing difficulty"
+    new_conent = text_embed_arr[6].get(
+        "text", "Incorporate basic features of a snake game such as scoring and increasing difficulty"
+    )
    new_message = Message(content=new_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
    new_messages = memory_storage.search_dissimilar(new_message)
    assert new_messages[0].content == message.content
--- a/tests/metagpt/provider/mock_llm_config.py
+++ b/tests/metagpt/provider/mock_llm_config.py
@ -42,3 +42,17 @@ mock_llm_config_zhipu = LLMConfig(
    model="mock_zhipu_model",
    proxy="http://localhost:8080",
 )
+
+
+mock_llm_config_spark = LLMConfig(
+    api_type="spark",
+    app_id="xxx",
+    api_key="xxx",
+    api_secret="xxx",
+    domain="generalv2",
+    base_url="wss://spark-api.xf-yun.com/v3.1/chat",
+)
+
+mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo")
+
+mock_llm_config_dashscope = LLMConfig(api_type="dashscope", api_key="xxx", model="qwen-max")
--- a/tests/metagpt/provider/req_resp_const.py
+++ b/tests/metagpt/provider/req_resp_const.py
@ -0,0 +1,145 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : default request & response data for provider unittest
+
+
+from dashscope.api_entities.dashscope_response import (
+    DashScopeAPIResponse,
+    GenerationOutput,
+    GenerationResponse,
+    GenerationUsage,
+)
+from openai.types.chat.chat_completion import (
+    ChatCompletion,
+    ChatCompletionMessage,
+    Choice,
+)
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import Choice as AChoice
+from openai.types.chat.chat_completion_chunk import ChoiceDelta
+from openai.types.completion_usage import CompletionUsage
+from qianfan.resources.typing import QfResponse
+
+from metagpt.provider.base_llm import BaseLLM
+
+prompt = "who are you?"
+messages = [{"role": "user", "content": prompt}]
+
+resp_cont_tmpl = "I'm {name}"
+default_resp_cont = resp_cont_tmpl.format(name="GPT")
+
+
+# part of whole ChatCompletion of openai like structure
+def get_part_chat_completion(name: str) -> dict:
+    part_chat_completion = {
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": resp_cont_tmpl.format(name=name),
+                },
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"completion_tokens": 22, "prompt_tokens": 19, "total_tokens": 41},
+    }
+    return part_chat_completion
+
+
+def get_openai_chat_completion(name: str) -> ChatCompletion:
+    openai_chat_completion = ChatCompletion(
+        id="cmpl-a6652c1bb181caae8dd19ad8",
+        model="xx/xxx",
+        object="chat.completion",
+        created=1703300855,
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(role="assistant", content=resp_cont_tmpl.format(name=name)),
+                logprobs=None,
+            )
+        ],
+        usage=CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202),
+    )
+    return openai_chat_completion
+
+
+def get_openai_chat_completion_chunk(name: str, usage_as_dict: bool = False) -> ChatCompletionChunk:
+    usage = CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202)
+    usage = usage if not usage_as_dict else usage.model_dump()
+    openai_chat_completion_chunk = ChatCompletionChunk(
+        id="cmpl-a6652c1bb181caae8dd19ad8",
+        model="xx/xxx",
+        object="chat.completion.chunk",
+        created=1703300855,
+        choices=[
+            AChoice(
+                delta=ChoiceDelta(role="assistant", content=resp_cont_tmpl.format(name=name)),
+                finish_reason="stop",
+                index=0,
+                logprobs=None,
+            )
+        ],
+        usage=usage,
+    )
+    return openai_chat_completion_chunk
+
+
+# For gemini
+gemini_messages = [{"role": "user", "parts": prompt}]
+
+
+# For QianFan
+qf_jsonbody_dict = {
+    "id": "as-4v1h587fyv",
+    "object": "chat.completion",
+    "created": 1695021339,
+    "result": "",
+    "is_truncated": False,
+    "need_clear_history": False,
+    "usage": {"prompt_tokens": 7, "completion_tokens": 15, "total_tokens": 22},
+}
+
+
+def get_qianfan_response(name: str) -> QfResponse:
+    qf_jsonbody_dict["result"] = resp_cont_tmpl.format(name=name)
+    return QfResponse(code=200, body=qf_jsonbody_dict)
+
+
+# For DashScope
+def get_dashscope_response(name: str) -> GenerationResponse:
+    return GenerationResponse.from_api_response(
+        DashScopeAPIResponse(
+            status_code=200,
+            output=GenerationOutput(
+                **{
+                    "text": "",
+                    "finish_reason": "",
+                    "choices": [
+                        {
+                            "finish_reason": "stop",
+                            "message": {"role": "assistant", "content": resp_cont_tmpl.format(name=name)},
+                        }
+                    ],
+                }
+            ),
+            usage=GenerationUsage(**{"input_tokens": 12, "output_tokens": 98, "total_tokens": 110}),
+        )
+    )
+
+
+# For llm general chat functions call
+async def llm_general_chat_funcs_test(llm: BaseLLM, prompt: str, messages: list[dict], resp_cont: str):
+    resp = await llm.aask(prompt, stream=False)
+    assert resp == resp_cont
+
+    resp = await llm.aask(prompt)
+    assert resp == resp_cont
+
+    resp = await llm.acompletion_text(messages, stream=False)
+    assert resp == resp_cont
+
+    resp = await llm.acompletion_text(messages, stream=True)
+    assert resp == resp_cont
--- a/tests/metagpt/provider/test_anthropic_api.py
+++ b/tests/metagpt/provider/test_anthropic_api.py
@ -8,25 +8,25 @@ from anthropic.resources.completions import Completion

 from metagpt.provider.anthropic_api import Claude2
 from tests.metagpt.provider.mock_llm_config import mock_llm_config
+from tests.metagpt.provider.req_resp_const import prompt, resp_cont_tmpl

-prompt = "who are you"
-resp = "I'am Claude2"
+resp_cont = resp_cont_tmpl.format(name="Claude")


 def mock_anthropic_completions_create(self, model: str, prompt: str, max_tokens_to_sample: int) -> Completion:
-    return Completion(id="xx", completion=resp, model="claude-2", stop_reason="stop_sequence", type="completion")
+    return Completion(id="xx", completion=resp_cont, model="claude-2", stop_reason="stop_sequence", type="completion")


 async def mock_anthropic_acompletions_create(self, model: str, prompt: str, max_tokens_to_sample: int) -> Completion:
-    return Completion(id="xx", completion=resp, model="claude-2", stop_reason="stop_sequence", type="completion")
+    return Completion(id="xx", completion=resp_cont, model="claude-2", stop_reason="stop_sequence", type="completion")


 def test_claude2_ask(mocker):
    mocker.patch("anthropic.resources.completions.Completions.create", mock_anthropic_completions_create)
-    assert resp == Claude2(mock_llm_config).ask(prompt)
+    assert resp_cont == Claude2(mock_llm_config).ask(prompt)


@pytest.mark.asyncio
 async def test_claude2_aask(mocker):
    mocker.patch("anthropic.resources.completions.AsyncCompletions.create", mock_anthropic_acompletions_create)
-    assert resp == await Claude2(mock_llm_config).aask(prompt)
+    assert resp_cont == await Claude2(mock_llm_config).aask(prompt)
--- a/tests/metagpt/provider/test_base_llm.py
+++ b/tests/metagpt/provider/test_base_llm.py
@ -11,21 +11,13 @@ import pytest
 from metagpt.configs.llm_config import LLMConfig
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.schema import Message
+from tests.metagpt.provider.req_resp_const import (
+    default_resp_cont,
+    get_part_chat_completion,
+    prompt,
+)

-default_chat_resp = {
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "I'am GPT",
-            },
-            "finish_reason": "stop",
-        }
-    ]
-}
-prompt_msg = "who are you"
-resp_content = default_chat_resp["choices"][0]["message"]["content"]
+name = "GPT"


 class MockBaseLLM(BaseLLM):
@ -33,16 +25,13 @@ class MockBaseLLM(BaseLLM):
        pass

    def completion(self, messages: list[dict], timeout=3):
-        return default_chat_resp
+        return get_part_chat_completion(name)

    async def acompletion(self, messages: list[dict], timeout=3):
-        return default_chat_resp
+        return get_part_chat_completion(name)

    async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
-        return resp_content
-
-    async def close(self):
-        return default_chat_resp
+        return default_resp_cont


 def test_base_llm():
@ -86,25 +75,25 @@ def test_base_llm():
    choice_text = base_llm.get_choice_text(openai_funccall_resp)
    assert choice_text == openai_funccall_resp["choices"][0]["message"]["content"]

-    # resp = base_llm.ask(prompt_msg)
-    # assert resp == resp_content
+    # resp = base_llm.ask(prompt)
+    # assert resp == default_resp_cont

-    # resp = base_llm.ask_batch([prompt_msg])
-    # assert resp == resp_content
+    # resp = base_llm.ask_batch([prompt])
+    # assert resp == default_resp_cont

-    # resp = base_llm.ask_code([prompt_msg])
-    # assert resp == resp_content
+    # resp = base_llm.ask_code([prompt])
+    # assert resp == default_resp_cont


@pytest.mark.asyncio
 async def test_async_base_llm():
    base_llm = MockBaseLLM()

-    resp = await base_llm.aask(prompt_msg)
-    assert resp == resp_content
+    resp = await base_llm.aask(prompt)
+    assert resp == default_resp_cont

-    resp = await base_llm.aask_batch([prompt_msg])
-    assert resp == resp_content
+    resp = await base_llm.aask_batch([prompt])
+    assert resp == default_resp_cont

-    # resp = await base_llm.aask_code([prompt_msg])
-    # assert resp == resp_content
+    # resp = await base_llm.aask_code([prompt])
+    # assert resp == default_resp_cont
--- a/tests/metagpt/provider/test_dashscope_api.py
+++ b/tests/metagpt/provider/test_dashscope_api.py
@ -0,0 +1,73 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : the unittest of DashScopeLLM
+
+from typing import AsyncGenerator, Union
+
+import pytest
+from dashscope.api_entities.dashscope_response import GenerationResponse
+
+from metagpt.provider.dashscope_api import DashScopeLLM
+from tests.metagpt.provider.mock_llm_config import mock_llm_config_dashscope
+from tests.metagpt.provider.req_resp_const import (
+    get_dashscope_response,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)
+
+name = "qwen-max"
+resp_cont = resp_cont_tmpl.format(name=name)
+
+
+@classmethod
+def mock_dashscope_call(
+    cls,
+    messages: list[dict],
+    model: str,
+    api_key: str,
+    result_format: str,
+    incremental_output: bool = True,
+    stream: bool = False,
+) -> GenerationResponse:
+    return get_dashscope_response(name)
+
+
+@classmethod
+async def mock_dashscope_acall(
+    cls,
+    messages: list[dict],
+    model: str,
+    api_key: str,
+    result_format: str,
+    incremental_output: bool = True,
+    stream: bool = False,
+) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]:
+    resps = [get_dashscope_response(name)]
+
+    if stream:
+
+        async def aresp_iterator(resps: list[GenerationResponse]):
+            for resp in resps:
+                yield resp
+
+        return aresp_iterator(resps)
+    else:
+        return resps[0]
+
+
+@pytest.mark.asyncio
+async def test_dashscope_acompletion(mocker):
+    mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call)
+    mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall)
+
+    dashscope_llm = DashScopeLLM(mock_llm_config_dashscope)
+
+    resp = dashscope_llm.completion(messages)
+    assert resp.choices[0]["message"]["content"] == resp_cont
+
+    resp = await dashscope_llm.acompletion(messages)
+    assert resp.choices[0]["message"]["content"] == resp_cont
+
+    await llm_general_chat_funcs_test(dashscope_llm, prompt, messages, resp_cont)
--- a/tests/metagpt/provider/test_fireworks_llm.py
+++ b/tests/metagpt/provider/test_fireworks_llm.py
@ -1,114 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Desc   : the unittest of fireworks api
-
-import pytest
-from openai.types.chat.chat_completion import (
-    ChatCompletion,
-    ChatCompletionMessage,
-    Choice,
-)
-from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
-from openai.types.chat.chat_completion_chunk import Choice as AChoice
-from openai.types.chat.chat_completion_chunk import ChoiceDelta
-from openai.types.completion_usage import CompletionUsage
-
-from metagpt.provider.fireworks_api import (
-    MODEL_GRADE_TOKEN_COSTS,
-    FireworksCostManager,
-    FireworksLLM,
-)
-from metagpt.utils.cost_manager import Costs
-from tests.metagpt.provider.mock_llm_config import mock_llm_config
-
-resp_content = "I'm fireworks"
-default_resp = ChatCompletion(
-    id="cmpl-a6652c1bb181caae8dd19ad8",
-    model="accounts/fireworks/models/llama-v2-13b-chat",
-    object="chat.completion",
-    created=1703300855,
-    choices=[
-        Choice(
-            finish_reason="stop",
-            index=0,
-            message=ChatCompletionMessage(role="assistant", content=resp_content),
-            logprobs=None,
-        )
-    ],
-    usage=CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202),
-)
-
-default_resp_chunk = ChatCompletionChunk(
-    id=default_resp.id,
-    model=default_resp.model,
-    object="chat.completion.chunk",
-    created=default_resp.created,
-    choices=[
-        AChoice(
-            delta=ChoiceDelta(content=resp_content, role="assistant"),
-            finish_reason="stop",
-            index=0,
-            logprobs=None,
-        )
-    ],
-    usage=dict(default_resp.usage),
-)
-
-prompt_msg = "who are you"
-messages = [{"role": "user", "content": prompt_msg}]
-
-
-def test_fireworks_costmanager():
-    cost_manager = FireworksCostManager()
-    assert MODEL_GRADE_TOKEN_COSTS["-1"] == cost_manager.model_grade_token_costs("test")
-    assert MODEL_GRADE_TOKEN_COSTS["-1"] == cost_manager.model_grade_token_costs("xxx-81b-chat")
-    assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("llama-v2-13b-chat")
-    assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("xxx-15.5b-chat")
-    assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("xxx-16b-chat")
-    assert MODEL_GRADE_TOKEN_COSTS["80"] == cost_manager.model_grade_token_costs("xxx-80b-chat")
-    assert MODEL_GRADE_TOKEN_COSTS["mixtral-8x7b"] == cost_manager.model_grade_token_costs("mixtral-8x7b-chat")
-
-    cost_manager.update_cost(prompt_tokens=500000, completion_tokens=500000, model="llama-v2-13b-chat")
-    assert cost_manager.total_cost == 0.5
-
-
-async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
-    if stream:
-
-        class Iterator(object):
-            async def __aiter__(self):
-                yield default_resp_chunk
-
-        return Iterator()
-    else:
-        return default_resp
-
-
-@pytest.mark.asyncio
-async def test_fireworks_acompletion(mocker):
-    mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
-
-    fireworks_gpt = FireworksLLM(mock_llm_config)
-    fireworks_gpt.model = "llama-v2-13b-chat"
-
-    fireworks_gpt._update_costs(
-        usage=CompletionUsage(prompt_tokens=500000, completion_tokens=500000, total_tokens=1000000)
-    )
-    assert fireworks_gpt.get_costs() == Costs(
-        total_prompt_tokens=500000, total_completion_tokens=500000, total_cost=0.5, total_budget=0
-    )
-
-    resp = await fireworks_gpt.acompletion(messages)
-    assert resp.choices[0].message.content in resp_content
-
-    resp = await fireworks_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
-
-    resp = await fireworks_gpt.acompletion_text(messages, stream=False)
-    assert resp == resp_content
-
-    resp = await fireworks_gpt.acompletion_text(messages, stream=True)
-    assert resp == resp_content
-
-    resp = await fireworks_gpt.aask(prompt_msg)
-    assert resp == resp_content
--- a/tests/metagpt/provider/test_google_gemini_api.py
+++ b/tests/metagpt/provider/test_google_gemini_api.py
@ -11,6 +11,12 @@ from google.generativeai.types import content_types

 from metagpt.provider.google_gemini_api import GeminiLLM
 from tests.metagpt.provider.mock_llm_config import mock_llm_config
+from tests.metagpt.provider.req_resp_const import (
+    gemini_messages,
+    llm_general_chat_funcs_test,
+    prompt,
+    resp_cont_tmpl,
+)


@dataclass
@ -18,10 +24,8 @@ class MockGeminiResponse(ABC):
    text: str


-prompt_msg = "who are you"
-messages = [{"role": "user", "parts": prompt_msg}]
-resp_content = "I'm gemini from google"
-default_resp = MockGeminiResponse(text=resp_content)
+resp_cont = resp_cont_tmpl.format(name="gemini")
+default_resp = MockGeminiResponse(text=resp_cont)


 def mock_gemini_count_tokens(self, contents: content_types.ContentsType) -> glm.CountTokensResponse:
@ -60,28 +64,18 @@ async def test_gemini_acompletion(mocker):
        mock_gemini_generate_content_async,
    )

-    gemini_gpt = GeminiLLM(mock_llm_config)
+    gemini_llm = GeminiLLM(mock_llm_config)

-    assert gemini_gpt._user_msg(prompt_msg) == {"role": "user", "parts": [prompt_msg]}
-    assert gemini_gpt._assistant_msg(prompt_msg) == {"role": "model", "parts": [prompt_msg]}
+    assert gemini_llm._user_msg(prompt) == {"role": "user", "parts": [prompt]}
+    assert gemini_llm._assistant_msg(prompt) == {"role": "model", "parts": [prompt]}

-    usage = gemini_gpt.get_usage(messages, resp_content)
+    usage = gemini_llm.get_usage(gemini_messages, resp_cont)
    assert usage == {"prompt_tokens": 20, "completion_tokens": 20}

-    resp = gemini_gpt.completion(messages)
+    resp = gemini_llm.completion(gemini_messages)
    assert resp == default_resp

-    resp = await gemini_gpt.acompletion(messages)
+    resp = await gemini_llm.acompletion(gemini_messages)
    assert resp.text == default_resp.text

-    resp = await gemini_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
-
-    resp = await gemini_gpt.acompletion_text(messages, stream=False)
-    assert resp == resp_content
-
-    resp = await gemini_gpt.acompletion_text(messages, stream=True)
-    assert resp == resp_content
-
-    resp = await gemini_gpt.aask(prompt_msg)
-    assert resp == resp_content
+    await llm_general_chat_funcs_test(gemini_llm, prompt, gemini_messages, resp_cont)
--- a/tests/metagpt/provider/test_ollama_api.py
+++ b/tests/metagpt/provider/test_ollama_api.py
@ -9,12 +9,15 @@ import pytest

 from metagpt.provider.ollama_api import OllamaLLM
 from tests.metagpt.provider.mock_llm_config import mock_llm_config
+from tests.metagpt.provider.req_resp_const import (
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)

-prompt_msg = "who are you"
-messages = [{"role": "user", "content": prompt_msg}]
-
-resp_content = "I'm ollama"
-default_resp = {"message": {"role": "assistant", "content": resp_content}}
+resp_cont = resp_cont_tmpl.format(name="ollama")
+default_resp = {"message": {"role": "assistant", "content": resp_cont}}


 async def mock_ollama_arequest(self, stream: bool = False, **kwargs) -> Tuple[Any, Any, bool]:
@ -41,19 +44,12 @@ async def mock_ollama_arequest(self, stream: bool = False, **kwargs) -> Tuple[An
 async def test_gemini_acompletion(mocker):
    mocker.patch("metagpt.provider.general_api_requestor.GeneralAPIRequestor.arequest", mock_ollama_arequest)

-    ollama_gpt = OllamaLLM(mock_llm_config)
+    ollama_llm = OllamaLLM(mock_llm_config)

-    resp = await ollama_gpt.acompletion(messages)
+    resp = await ollama_llm.acompletion(messages)
    assert resp["message"]["content"] == default_resp["message"]["content"]

-    resp = await ollama_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
+    resp = await ollama_llm.aask(prompt, stream=False)
+    assert resp == resp_cont

-    resp = await ollama_gpt.acompletion_text(messages, stream=False)
-    assert resp == resp_content
-
-    resp = await ollama_gpt.acompletion_text(messages, stream=True)
-    assert resp == resp_content
-
-    resp = await ollama_gpt.aask(prompt_msg)
-    assert resp == resp_content
+    await llm_general_chat_funcs_test(ollama_llm, prompt, messages, resp_cont)
--- a/tests/metagpt/provider/test_open_llm_api.py
+++ b/tests/metagpt/provider/test_open_llm_api.py
@ -1,92 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Desc   :
-
-import pytest
-from openai.types.chat.chat_completion import (
-    ChatCompletion,
-    ChatCompletionMessage,
-    Choice,
-)
-from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
-from openai.types.chat.chat_completion_chunk import Choice as AChoice
-from openai.types.chat.chat_completion_chunk import ChoiceDelta
-from openai.types.completion_usage import CompletionUsage
-
-from metagpt.provider.open_llm_api import OpenLLM
-from metagpt.utils.cost_manager import Costs
-from tests.metagpt.provider.mock_llm_config import mock_llm_config
-
-resp_content = "I'm llama2"
-default_resp = ChatCompletion(
-    id="cmpl-a6652c1bb181caae8dd19ad8",
-    model="llama-v2-13b-chat",
-    object="chat.completion",
-    created=1703302755,
-    choices=[
-        Choice(
-            finish_reason="stop",
-            index=0,
-            message=ChatCompletionMessage(role="assistant", content=resp_content),
-            logprobs=None,
-        )
-    ],
-)
-
-default_resp_chunk = ChatCompletionChunk(
-    id=default_resp.id,
-    model=default_resp.model,
-    object="chat.completion.chunk",
-    created=default_resp.created,
-    choices=[
-        AChoice(
-            delta=ChoiceDelta(content=resp_content, role="assistant"),
-            finish_reason="stop",
-            index=0,
-            logprobs=None,
-        )
-    ],
-)
-
-prompt_msg = "who are you"
-messages = [{"role": "user", "content": prompt_msg}]
-
-
-async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
-    if stream:
-
-        class Iterator(object):
-            async def __aiter__(self):
-                yield default_resp_chunk
-
-        return Iterator()
-    else:
-        return default_resp
-
-
-@pytest.mark.asyncio
-async def test_openllm_acompletion(mocker):
-    mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
-
-    openllm_gpt = OpenLLM(mock_llm_config)
-    openllm_gpt.model = "llama-v2-13b-chat"
-
-    openllm_gpt._update_costs(usage=CompletionUsage(prompt_tokens=100, completion_tokens=100, total_tokens=200))
-    assert openllm_gpt.get_costs() == Costs(
-        total_prompt_tokens=100, total_completion_tokens=100, total_cost=0, total_budget=0
-    )
-
-    resp = await openllm_gpt.acompletion(messages)
-    assert resp.choices[0].message.content in resp_content
-
-    resp = await openllm_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
-
-    resp = await openllm_gpt.acompletion_text(messages, stream=False)
-    assert resp == resp_content
-
-    resp = await openllm_gpt.acompletion_text(messages, stream=True)
-    assert resp == resp_content
-
-    resp = await openllm_gpt.aask(prompt_msg)
-    assert resp == resp_content
--- a/tests/metagpt/provider/test_openai.py
+++ b/tests/metagpt/provider/test_openai.py
@ -1,12 +1,11 @@
-import json
-
 import pytest
 from openai.types.chat import (
    ChatCompletion,
+    ChatCompletionChunk,
    ChatCompletionMessage,
    ChatCompletionMessageToolCall,
 )
-from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion import Choice, CompletionUsage
 from openai.types.chat.chat_completion_message_tool_call import Function
 from PIL import Image

@ -18,6 +17,22 @@ from tests.metagpt.provider.mock_llm_config import (
    mock_llm_config,
    mock_llm_config_proxy,
 )
+from tests.metagpt.provider.req_resp_const import (
+    get_openai_chat_completion,
+    get_openai_chat_completion_chunk,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)
+
+name = "AI assistant"
+resp_cont = resp_cont_tmpl.format(name=name)
+default_resp = get_openai_chat_completion(name)
+
+default_resp_chunk = get_openai_chat_completion_chunk(name, usage_as_dict=True)
+
+usage = CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202)


@pytest.mark.asyncio
@ -106,9 +121,11 @@ class TestOpenAI:

    def test_aask_code_json_decode_error(self, json_decode_error):
        instance = OpenAILLM(mock_llm_config)
-        with pytest.raises(json.decoder.JSONDecodeError) as e:
-            instance.get_choice_function_arguments(json_decode_error)
-        assert "JSONDecodeError" in str(e)
+        code = instance.get_choice_function_arguments(json_decode_error)
+        assert "code" in code
+        assert "language" in code
+        assert "hello world" in code["code"]
+        logger.info(f'code is : {code["code"]}')


@pytest.mark.asyncio
@ -121,3 +138,29 @@ async def test_gen_image():

    images: list[Image] = await llm.gen_image(model=model, prompt=prompt, resp_format="b64_json")
    assert images[0].size == (1024, 1024)
+
+
+async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
+    if stream:
+
+        class Iterator(object):
+            async def __aiter__(self):
+                yield default_resp_chunk
+
+        return Iterator()
+    else:
+        return default_resp
+
+
+@pytest.mark.asyncio
+async def test_openai_acompletion(mocker):
+    mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
+
+    llm = OpenAILLM(mock_llm_config)
+
+    resp = await llm.acompletion(messages)
+    assert resp.choices[0].finish_reason == "stop"
+    assert resp.choices[0].message.content == resp_cont
+    assert resp.usage == usage
+
+    await llm_general_chat_funcs_test(llm, prompt, messages, resp_cont)
--- a/tests/metagpt/provider/test_qianfan_api.py
+++ b/tests/metagpt/provider/test_qianfan_api.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : the unittest of qianfan api
+
+from typing import AsyncIterator, Union
+
+import pytest
+from qianfan.resources.typing import JsonBody, QfResponse
+
+from metagpt.provider.qianfan_api import QianFanLLM
+from tests.metagpt.provider.mock_llm_config import mock_llm_config_qianfan
+from tests.metagpt.provider.req_resp_const import (
+    get_qianfan_response,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)
+
+name = "ERNIE-Bot-turbo"
+resp_cont = resp_cont_tmpl.format(name=name)
+
+
+def mock_qianfan_do(self, messages: list[dict], model: str, stream: bool = False, system: str = None) -> QfResponse:
+    return get_qianfan_response(name=name)
+
+
+async def mock_qianfan_ado(
+    self, messages: list[dict], model: str, stream: bool = True, system: str = None
+) -> Union[QfResponse, AsyncIterator[QfResponse]]:
+    resps = [get_qianfan_response(name=name)]
+    if stream:
+
+        async def aresp_iterator(resps: list[JsonBody]):
+            for resp in resps:
+                yield resp
+
+        return aresp_iterator(resps)
+    else:
+        return resps[0]
+
+
+@pytest.mark.asyncio
+async def test_qianfan_acompletion(mocker):
+    mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.do", mock_qianfan_do)
+    mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.ado", mock_qianfan_ado)
+
+    qianfan_llm = QianFanLLM(mock_llm_config_qianfan)
+
+    resp = qianfan_llm.completion(messages)
+    assert resp.get("result") == resp_cont
+
+    resp = await qianfan_llm.acompletion(messages)
+    assert resp.get("result") == resp_cont
+
+    await llm_general_chat_funcs_test(qianfan_llm, prompt, messages, resp_cont)
--- a/tests/metagpt/provider/test_spark_api.py
+++ b/tests/metagpt/provider/test_spark_api.py
@ -4,12 +4,18 @@

 import pytest

-from metagpt.config2 import Config
 from metagpt.provider.spark_api import GetMessageFromWeb, SparkLLM
-from tests.metagpt.provider.mock_llm_config import mock_llm_config
+from tests.metagpt.provider.mock_llm_config import (
+    mock_llm_config,
+    mock_llm_config_spark,
+)
+from tests.metagpt.provider.req_resp_const import (
+    llm_general_chat_funcs_test,
+    prompt,
+    resp_cont_tmpl,
+)

-prompt_msg = "who are you"
-resp_content = "I'm Spark"
+resp_cont = resp_cont_tmpl.format(name="Spark")


 class MockWebSocketApp(object):
@ -23,7 +29,7 @@ class MockWebSocketApp(object):
 def test_get_msg_from_web(mocker):
    mocker.patch("websocket.WebSocketApp", MockWebSocketApp)

-    get_msg_from_web = GetMessageFromWeb(prompt_msg, mock_llm_config)
+    get_msg_from_web = GetMessageFromWeb(prompt, mock_llm_config)
    assert get_msg_from_web.gen_params()["parameter"]["chat"]["domain"] == "mock_domain"

    ret = get_msg_from_web.run()
@ -31,34 +37,26 @@ def test_get_msg_from_web(mocker):


 def mock_spark_get_msg_from_web_run(self) -> str:
-    return resp_content
+    return resp_cont


@pytest.mark.asyncio
-async def test_spark_aask():
-    llm = SparkLLM(Config.from_home("spark.yaml").llm)
+async def test_spark_aask(mocker):
+    mocker.patch("metagpt.provider.spark_api.GetMessageFromWeb.run", mock_spark_get_msg_from_web_run)
+
+    llm = SparkLLM(mock_llm_config_spark)

    resp = await llm.aask("Hello!")
-    print(resp)
+    assert resp == resp_cont


@pytest.mark.asyncio
 async def test_spark_acompletion(mocker):
    mocker.patch("metagpt.provider.spark_api.GetMessageFromWeb.run", mock_spark_get_msg_from_web_run)

-    spark_gpt = SparkLLM(mock_llm_config)
+    spark_llm = SparkLLM(mock_llm_config)

-    resp = await spark_gpt.acompletion([])
-    assert resp == resp_content
+    resp = await spark_llm.acompletion([])
+    assert resp == resp_cont

-    resp = await spark_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
-
-    resp = await spark_gpt.acompletion_text([], stream=False)
-    assert resp == resp_content
-
-    resp = await spark_gpt.acompletion_text([], stream=True)
-    assert resp == resp_content
-
-    resp = await spark_gpt.aask(prompt_msg)
-    assert resp == resp_content
+    await llm_general_chat_funcs_test(spark_llm, prompt, prompt, resp_cont)
--- a/tests/metagpt/provider/test_zhipuai_api.py
+++ b/tests/metagpt/provider/test_zhipuai_api.py
@ -6,22 +6,24 @@ import pytest

 from metagpt.provider.zhipuai_api import ZhiPuAILLM
 from tests.metagpt.provider.mock_llm_config import mock_llm_config_zhipu
+from tests.metagpt.provider.req_resp_const import (
+    get_part_chat_completion,
+    llm_general_chat_funcs_test,
+    messages,
+    prompt,
+    resp_cont_tmpl,
+)

-prompt_msg = "who are you"
-messages = [{"role": "user", "content": prompt_msg}]
-
-resp_content = "I'm chatglm-turbo"
-default_resp = {
-    "choices": [{"finish_reason": "stop", "index": 0, "message": {"content": resp_content, "role": "assistant"}}],
-    "usage": {"completion_tokens": 22, "prompt_tokens": 19, "total_tokens": 41},
-}
+name = "ChatGLM-4"
+resp_cont = resp_cont_tmpl.format(name=name)
+default_resp = get_part_chat_completion(name)


-async def mock_zhipuai_acreate_stream(**kwargs):
+async def mock_zhipuai_acreate_stream(self, **kwargs):
    class MockResponse(object):
        async def _aread(self):
            class Iterator(object):
-                events = [{"choices": [{"index": 0, "delta": {"content": resp_content, "role": "assistant"}}]}]
+                events = [{"choices": [{"index": 0, "delta": {"content": resp_cont, "role": "assistant"}}]}]

                async def __aiter__(self):
                    for event in self.events:
@ -37,7 +39,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
    return MockResponse()


-async def mock_zhipuai_acreate(**kwargs) -> dict:
+async def mock_zhipuai_acreate(self, **kwargs) -> dict:
    return default_resp


@ -46,22 +48,12 @@ async def test_zhipuai_acompletion(mocker):
    mocker.patch("metagpt.provider.zhipuai.zhipu_model_api.ZhiPuModelAPI.acreate", mock_zhipuai_acreate)
    mocker.patch("metagpt.provider.zhipuai.zhipu_model_api.ZhiPuModelAPI.acreate_stream", mock_zhipuai_acreate_stream)

-    zhipu_gpt = ZhiPuAILLM(mock_llm_config_zhipu)
+    zhipu_llm = ZhiPuAILLM(mock_llm_config_zhipu)

-    resp = await zhipu_gpt.acompletion(messages)
-    assert resp["choices"][0]["message"]["content"] == resp_content
+    resp = await zhipu_llm.acompletion(messages)
+    assert resp["choices"][0]["message"]["content"] == resp_cont

-    resp = await zhipu_gpt.aask(prompt_msg, stream=False)
-    assert resp == resp_content
-
-    resp = await zhipu_gpt.acompletion_text(messages, stream=False)
-    assert resp == resp_content
-
-    resp = await zhipu_gpt.acompletion_text(messages, stream=True)
-    assert resp == resp_content
-
-    resp = await zhipu_gpt.aask(prompt_msg)
-    assert resp == resp_content
+    await llm_general_chat_funcs_test(zhipu_llm, prompt, messages, resp_cont)


 def test_zhipuai_proxy():
--- a/tests/metagpt/roles/ci/test_code_interpreter.py
+++ b/tests/metagpt/roles/ci/test_code_interpreter.py
@ -1,19 +0,0 @@
-import pytest
-
-from metagpt.logs import logger
-from metagpt.roles.ci.code_interpreter import CodeInterpreter
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("auto_run", [(True), (False)])
-async def test_code_interpreter(mocker, auto_run):
-    mocker.patch("metagpt.actions.ci.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
-    mocker.patch("builtins.input", return_value="confirm")
-
-    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
-    tools = []
-
-    ci = CodeInterpreter(auto_run=auto_run, use_tools=True, tools=tools)
-    rsp = await ci.run(requirement)
-    logger.info(rsp)
-    assert len(rsp.content) > 0
--- a/tests/metagpt/roles/mi/test_interpreter.py
+++ b/tests/metagpt/roles/mi/test_interpreter.py
@ -0,0 +1,23 @@
+import pytest
+
+from metagpt.logs import logger
+from metagpt.roles.mi.interpreter import Interpreter
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("auto_run", [(True), (False)])
+async def test_interpreter(mocker, auto_run):
+    mocker.patch("metagpt.actions.mi.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
+    mocker.patch("builtins.input", return_value="confirm")
+
+    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+    tools = []
+
+    mi = Interpreter(auto_run=auto_run, use_tools=True, tools=tools)
+    rsp = await mi.run(requirement)
+    logger.info(rsp)
+    assert len(rsp.content) > 0
+
+    finished_tasks = mi.planner.plan.get_finished_tasks()
+    assert len(finished_tasks) > 0
+    assert len(finished_tasks[0].code) > 0  # check one task to see if code is recorded
--- a/tests/metagpt/roles/mi/test_ml_engineer.py
+++ b/tests/metagpt/roles/mi/test_ml_engineer.py
@ -1,16 +1,16 @@
 import pytest

-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
 from metagpt.logs import logger
-from metagpt.roles.ci.ml_engineer import MLEngineer
+from metagpt.roles.mi.ml_engineer import MLEngineer
 from metagpt.schema import Message, Plan, Task
 from metagpt.tools.tool_type import ToolType
-from tests.metagpt.actions.ci.test_debug_code import CODE, DebugContext, ErrorStr
+from tests.metagpt.actions.mi.test_debug_code import CODE, DebugContext, ErrorStr


 def test_mle_init():
-    ci = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
-    assert ci.tools == []
+    mle = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
+    assert mle.tools == []


 MockPlan = Plan(
--- a/tests/metagpt/tools/libs/test_email_login.py
+++ b/tests/metagpt/tools/libs/test_email_login.py
@ -0,0 +1,7 @@
+from metagpt.tools.libs.email_login import email_login_imap
+
+
+def test_email_login(mocker):
+    mock_mailbox = mocker.patch("metagpt.tools.libs.email_login.MailBox.login")
+    mock_mailbox.login.return_value = mocker.Mock()
+    email_login_imap("test@outlook.com", "test_password")
--- a/tests/metagpt/utils/test_mermaid.py
+++ b/tests/metagpt/utils/test_mermaid.py
@ -14,7 +14,7 @@ from metagpt.utils.mermaid import MMC1, mermaid_to_file

@pytest.mark.asyncio
@pytest.mark.parametrize("engine", ["nodejs", "ink"])  # TODO: playwright and pyppeteer
-async def test_mermaid(engine, context):
+async def test_mermaid(engine, context, mermaid_mocker):
    # nodejs prerequisites: npm install -g @mermaid-js/mermaid-cli
    # ink prerequisites: connected to internet
    # playwright prerequisites: playwright install --with-deps chromium
--- a/tests/metagpt/utils/test_repair_llm_raw_output.py
+++ b/tests/metagpt/utils/test_repair_llm_raw_output.py
@ -211,6 +211,11 @@ value
    output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1")
    assert output == target_output

+    raw_output = '{"key": "url "http" \\"https\\" "}'
+    target_output = '{"key": "url \\"http\\" \\"https\\" "}'
+    output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 1 column 15 (char 14)")
+    assert output == target_output
+

 def test_retry_parse_json_text():
    from metagpt.utils.repair_llm_raw_output import retry_parse_json_text
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@ -6,7 +6,7 @@
 import nbformat
 import pytest

-from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
+from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
 from metagpt.utils.common import read_json_file
 from metagpt.utils.save_code import DATA_PATH, save_code_file

--- a/tests/metagpt/utils/test_text.py
+++ b/tests/metagpt/utils/test_text.py
@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected
        (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
        (" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
        (" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
+        (" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8),
    ],
 )
 def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):
--- a/Show more
+++ b/Show more