Merge branch 'geekan:main' into feat/add-cusotm-llm-config

2026-05-15 11:02:36 +02:00 · 2024-03-14 08:34:56 +08:00 · 2024-03-14 08:34:56 +08:00 · 2a4934094c
commit 2a4934094c
parent b2cfea2c74 72179a0bd4
147 changed files with 4542 additions and 2574 deletions
--- a/README.md
+++ b/README.md
@ -26,7 +26,9 @@ # MetaGPT: The Multi-Agent Framework
 </p>

 ## News
-🚀 Feb. 08, 2024: [v0.7.0](https://github.com/geekan/MetaGPT/releases/tag/v0.7.0) released, supporting assigning different LLMs to different Roles. We also introduced [Interpreter](https://github.com/geekan/MetaGPT/blob/main/examples/mi/README.md), a powerful agent capable of solving a wide range of real-world problems.
+🚀 Mar. 14, 2024: Our Data Interpreter paper is on [arxiv](https://arxiv.org/abs/2402.18679). Check the [example](https://docs.deepwisdom.ai/main/en/DataInterpreter/) and [code](https://github.com/geekan/MetaGPT/tree/main/examples/di)!
+
+🚀 Feb. 08, 2024: [v0.7.0](https://github.com/geekan/MetaGPT/releases/tag/v0.7.0) released, supporting assigning different LLMs to different Roles. We also introduced [Data Interpreter](https://github.com/geekan/MetaGPT/blob/main/examples/di/README.md), a powerful agent capable of solving a wide range of real-world problems.

 🚀 Jan. 16, 2024: Our paper [MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework
 ](https://arxiv.org/abs/2308.00352) accepted for oral presentation **(top 1.2%)** at ICLR 2024, **ranking #1** in the LLM-based Agent category.
@ -177,7 +179,7 @@ ### Contact Information

 ## Citation

-For now, cite the [arXiv paper](https://arxiv.org/abs/2308.00352):
+If you use MetaGPT or Data Interpreter in a research paper, please cite our work as follows:

 ```bibtex
@misc{hong2023metagpt,
@ -188,4 +190,14 @@ ## Citation
      archivePrefix={arXiv},
      primaryClass={cs.AI}
 }
+@misc{hong2024data,
+      title={Data Interpreter: An LLM Agent For Data Science}, 
+      author={Sirui Hong and Yizhang Lin and Bang Liu and Bangbang Liu and Binhao Wu and Danyang Li and Jiaqi Chen and Jiayi Zhang and Jinlin Wang and Li Zhang and Lingyao Zhang and Min Yang and Mingchen Zhuge and Taicheng Guo and Tuo Zhou and Wei Tao and Wenyi Wang and Xiangru Tang and Xiangtao Lu and Xiawu Zheng and Xinbing Liang and Yaying Fei and Yuheng Cheng and Zongze Xu and Chenglin Wu},
+      year={2024},
+      eprint={2402.18679},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI}
+}
+
 ```
+
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+| Version | Supported          |
+|---------|--------------------|
+ | 7.x     | :x:                |
+ | 6.x     | :x:                |
+| < 6.x   | :x:                |
+
+
+## Reporting a Vulnerability
+
+If you have any vulnerability reports, please contact alexanderwu@deepwisdom.ai .
--- a/config/config2.example.yaml
+++ b/config/config2.example.yaml
@ -3,8 +3,16 @@ llm:
  base_url: "YOUR_BASE_URL"
  api_key: "YOUR_API_KEY"
  model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
-  repair_llm_output: true  # when the output is not a valid json, try to repair it
  proxy: "YOUR_PROXY"  # for LLM API requests
+  pricing_plan: "" # Optional. If invalid, it will be automatically filled in with the value of the `model`.
+  # Azure-exclusive pricing plan mappings：
+  # - gpt-3.5-turbo 4k: "gpt-3.5-turbo-1106"
+  # - gpt-4-turbo: "gpt-4-turbo-preview"
+  # - gpt-4-turbo-vision: "gpt-4-vision-preview"
+  # - gpt-4 8k: "gpt-4"
+  # See for more: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/
+
+repair_llm_output: true  # when the output is not a valid json, try to repair it

 proxy: "YOUR_PROXY"  # for tools like requests, playwright, selenium, etc.

--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@ -116,7 +116,7 @@ ### 联系信息

 ## 引用

-引用 [arXiv paper](https://arxiv.org/abs/2308.00352):
+如果您在研究论文中使用 MetaGPT 或 Data Interpreter，请引用我们的工作：

 ```bibtex
@misc{hong2023metagpt,
@ -127,4 +127,12 @@ ## 引用
      archivePrefix={arXiv},
      primaryClass={cs.AI}
 }
+@misc{hong2024data,
+      title={Data Interpreter: An LLM Agent For Data Science}, 
+      author={Sirui Hong and Yizhang Lin and Bang Liu and Bangbang Liu and Binhao Wu and Danyang Li and Jiaqi Chen and Jiayi Zhang and Jinlin Wang and Li Zhang and Lingyao Zhang and Min Yang and Mingchen Zhuge and Taicheng Guo and Tuo Zhou and Wei Tao and Wenyi Wang and Xiangru Tang and Xiangtao Lu and Xiawu Zheng and Xinbing Liang and Yaying Fei and Yuheng Cheng and Zongze Xu and Chenglin Wu},
+      year={2024},
+      eprint={2402.18679},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI}
+}
 ```
--- a/docs/README_JA.md
+++ b/docs/README_JA.md
@ -295,7 +295,7 @@ ## クイックスタート

 ## 引用

-現時点では、[arXiv 論文](https://arxiv.org/abs/2308.00352)を引用してください:
+研究論文でMetaGPTやData Interpreterを使用する場合は、以下のように当社の作業を引用してください：

 ```bibtex
@misc{hong2023metagpt,
@ -306,6 +306,14 @@ ## 引用
      archivePrefix={arXiv},
      primaryClass={cs.AI}
 }
+@misc{hong2024data,
+      title={Data Interpreter: An LLM Agent For Data Science}, 
+      author={Sirui Hong and Yizhang Lin and Bang Liu and Bangbang Liu and Binhao Wu and Danyang Li and Jiaqi Chen and Jiayi Zhang and Jinlin Wang and Li Zhang and Lingyao Zhang and Min Yang and Mingchen Zhuge and Taicheng Guo and Tuo Zhou and Wei Tao and Wenyi Wang and Xiangru Tang and Xiangtao Lu and Xiawu Zheng and Xinbing Liang and Yaying Fei and Yuheng Cheng and Zongze Xu and Chenglin Wu},
+      year={2024},
+      eprint={2402.18679},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI}
+}
 ```

 ## お問い合わせ先
--- a/examples/debate.py
+++ b/examples/debate.py
@ -5,6 +5,7 @@ Author: garylin2099
@Modified By: mashenquan, 2023-11-1. In accordance with Chapter 2.1.3 of RFC 116, modify the data type of the `send_to`
        value of the `Message` object; modify the argument type of `get_by_actions`.
 """
+
 import asyncio
 import platform
 from typing import Any
@ -105,4 +106,4 @@ def main(idea: str, investment: float = 3.0, n_round: int = 10):


 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(main)  # run as python debate.py --idea="TOPIC" --investment=3.0 --n_round=5
--- a/examples/di/README.md
+++ b/examples/di/README.md
@ -0,0 +1,20 @@
+# Data Interpreter (DI)
+
+## What is Data Interpreter
+Data Interpreter is an agent who solves data-related problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below. For overall design and technical details, please see our [paper](https://arxiv.org/abs/2402.18679).
+
+## Example List
+- Data visualization
+- Machine learning modeling
+- Image background removal
+- Solve math problems
+- Receipt OCR
+- Tool usage: web page imitation
+- Tool usage: web crawling
+- Tool usage: text2image
+- Tool usage: email summarization and response\
+- More on the way!
+
+Please see the [docs](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for more explanation.
+
+We are continuously releasing codes, stay tuned!
--- a/examples/di/crawl_webpage.py
+++ b/examples/di/crawl_webpage.py
@ -0,0 +1,40 @@
+# -*- encoding: utf-8 -*-
+"""
+@Date    :   2024/01/24 15:11:27
+@Author  :   orange-crow
+@File    :   crawl_webpage.py
+"""
+
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+PAPER_LIST_REQ = """"
+Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
+and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
+"""
+
+ECOMMERCE_REQ = """
+Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
+**Notice: Firstly parse the web page encoding and the text HTML structure;
+The first page product name, price, product URL, and image URL must be saved in the csv;**
+"""
+
+NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
+下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整:
+1. 爬取并本地保存html结构;
+2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*;
+3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间;
+4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。
+5. 将全部结果存在本地csv中
+"""
+
+
+async def main():
+    di = DataInterpreter(tools=["scrape_web_playwright"])
+
+    await di.run(ECOMMERCE_REQ)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/examples/di/data_visualization.py
+++ b/examples/di/data_visualization.py
@ -1,11 +1,11 @@
 import asyncio

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    mi = Interpreter(use_tools=False)
-    await mi.run(requirement)
+    di = DataInterpreter()
+    await di.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/email_summary.py
+++ b/examples/di/email_summary.py
@ -6,7 +6,7 @@
 """
 import os

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main():
@ -22,9 +22,9 @@ async def main():
            Firstly, Please help me fetch the latest 5 senders and full letter contents.
            Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""

-    mi = Interpreter(use_tools=True)
+    di = DataInterpreter()

-    await mi.run(prompt)
+    await di.run(prompt)


 if __name__ == "__main__":
--- a/examples/di/imitate_webpage.py
+++ b/examples/di/imitate_webpage.py
@ -5,19 +5,18 @@
@Author  : mannaandpoem
@File    : imitate_webpage.py
 """
-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main():
    web_url = "https://pytorch.org/"
    prompt = f"""This is a URL of webpage: '{web_url}' .
 Firstly, utilize Selenium and WebDriver for rendering. 
-Secondly, convert image to a webpage including HTML, CSS and JS in one go. 
-Finally, save webpage in a text file. 
+Secondly, convert image to a webpage including HTML, CSS and JS in one go.
 Note: All required dependencies and environments have been fully installed and configured."""
-    mi = Interpreter(use_tools=True)
+    di = DataInterpreter(tools=["GPTvGenerator"])

-    await mi.run(prompt)
+    await di.run(prompt)


 if __name__ == "__main__":
--- a/examples/di/machine_learning.py
+++ b/examples/di/machine_learning.py
@ -1,12 +1,12 @@
 import fire

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(auto_run: bool = True):
    requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
-    mi = Interpreter(auto_run=auto_run)
-    await mi.run(requirement)
+    di = DataInterpreter(auto_run=auto_run)
+    await di.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/machine_learning_with_tools.py
+++ b/examples/di/machine_learning_with_tools.py
@ -0,0 +1,16 @@
+import asyncio
+
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+async def main(requirement: str):
+    role = DataInterpreter(use_reflection=True, tools=["<all>"])
+    await role.run(requirement)
+
+
+if __name__ == "__main__":
+    data_path = "your/path/to/titanic"
+    train_path = f"{data_path}/split_train.csv"
+    eval_path = f"{data_path}/split_eval.csv"
+    requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{train_path}', eval data path: '{eval_path}'."
+    asyncio.run(main(requirement))
--- a/examples/di/ocr_receipt.py
+++ b/examples/di/ocr_receipt.py
@ -1,4 +1,4 @@
-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main():
@ -10,9 +10,9 @@ async def main():
    coordinates and confidence levels, then recognize the total amount from ocr text content, and finally save as table. 
    Image path: {image_path}.
    NOTE: The environments for Paddle and PaddleOCR are all ready and has been fully installed."""
-    mi = Interpreter()
+    di = DataInterpreter()

-    await mi.run(requirement)
+    await di.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/rm_image_background.py
+++ b/examples/di/rm_image_background.py
@ -1,11 +1,11 @@
 import asyncio

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    mi = Interpreter(use_tools=False)
-    await mi.run(requirement)
+    di = DataInterpreter()
+    await di.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/sd_tool_usage.py
+++ b/examples/di/sd_tool_usage.py
@ -4,12 +4,12 @@
 # @Desc    :
 import asyncio

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    mi = Interpreter(use_tools=True, goal=requirement)
-    await mi.run(requirement)
+    di = DataInterpreter(tools=["SDEngine"])
+    await di.run(requirement)


 if __name__ == "__main__":
--- a/examples/di/solve_math_problems.py
+++ b/examples/di/solve_math_problems.py
@ -1,13 +1,14 @@
 import asyncio

-from metagpt.roles.mi.interpreter import Interpreter
+from metagpt.roles.di.data_interpreter import DataInterpreter


 async def main(requirement: str = ""):
-    mi = Interpreter(use_tools=False)
-    await mi.run(requirement)
+    di = DataInterpreter()
+    await di.run(requirement)


 if __name__ == "__main__":
    requirement = "Solve this math problem: The greatest common divisor of positive integers m and n is 6. The least common multiple of m and n is 126. What is the least possible value of m + n?"
+    # answer: 60 (m = 18, n = 42)
    asyncio.run(main(requirement))
--- a/examples/mi/README.md
+++ b/examples/mi/README.md
@ -1,18 +0,0 @@
-# MetaGPT Interpreter (MI)
-
-## What is Interpreter
-Interpreter is an agent who solves problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below.
-
-## Example List
- Data visualization
- Machine learning modeling
- Image background removal
- Solve math problems
- Receipt OCR
- Tool usage: web page imitation
- Tool usage: web crawling
- Tool usage: text2image
- Tool usage: email summarization and response
- More on the way!
-
-Please see [here](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/mi_intro.html) for detailed explanation.
--- a/examples/mi/crawl_webpage.py
+++ b/examples/mi/crawl_webpage.py
@ -1,22 +0,0 @@
-# -*- encoding: utf-8 -*-
-"""
-@Date    :   2024/01/24 15:11:27
-@Author  :   orange-crow
-@File    :   crawl_webpage.py
-"""
-
-from metagpt.roles.mi.interpreter import Interpreter
-
-
-async def main():
-    prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
-    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
-    mi = Interpreter(use_tools=True)
-
-    await mi.run(prompt)
-
-
-if __name__ == "__main__":
-    import asyncio
-
-    asyncio.run(main())
--- a/examples/mi/ml_engineer_with_tools.py
+++ b/examples/mi/ml_engineer_with_tools.py
@ -1,16 +0,0 @@
-import asyncio
-
-from metagpt.roles.mi.ml_engineer import MLEngineer
-
-
-async def main(requirement: str):
-    role = MLEngineer(auto_run=True, use_tools=True)
-    await role.run(requirement)
-
-
-if __name__ == "__main__":
-    data_path = "your_path_to_icr/icr-identify-age-related-conditions"
-    train_path = f"{data_path}/your_train_data.csv"
-    eval_path = f"{data_path}/your_eval_data.csv"
-    requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {train_path}, eval data path:{eval_path}."
-    asyncio.run(main(requirement))
--- a/examples/reverse_engineering.py
+++ b/examples/reverse_engineering.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import asyncio
+import shutil
+from pathlib import Path
+
+import typer
+
+from metagpt.actions.rebuild_class_view import RebuildClassView
+from metagpt.actions.rebuild_sequence_view import RebuildSequenceView
+from metagpt.context import Context
+from metagpt.llm import LLM
+from metagpt.logs import logger
+from metagpt.utils.git_repository import GitRepository
+from metagpt.utils.project_repo import ProjectRepo
+
+app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
+
+
+@app.command("", help="Python project reverse engineering.")
+def startup(
+    project_root: str = typer.Argument(
+        default="",
+        help="Specify the root directory of the existing project for reverse engineering.",
+    ),
+    output_dir: str = typer.Option(default="", help="Specify the output directory path for reverse engineering."),
+):
+    package_root = Path(project_root)
+    if not package_root.exists():
+        raise FileNotFoundError(f"{project_root} not exists")
+    if not _is_python_package_root(package_root):
+        raise FileNotFoundError(f'There are no "*.py" files under "{project_root}".')
+    init_file = package_root / "__init__.py"  # used by pyreverse
+    init_file_exists = init_file.exists()
+    if not init_file_exists:
+        init_file.touch()
+
+    if not output_dir:
+        output_dir = package_root / "../reverse_engineering_output"
+    logger.info(f"output dir:{output_dir}")
+    try:
+        asyncio.run(reverse_engineering(package_root, Path(output_dir)))
+    finally:
+        if not init_file_exists:
+            init_file.unlink(missing_ok=True)
+        tmp_dir = package_root / "__dot__"
+        if tmp_dir.exists():
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _is_python_package_root(package_root: Path) -> bool:
+    for file_path in package_root.iterdir():
+        if file_path.is_file():
+            if file_path.suffix == ".py":
+                return True
+    return False
+
+
+async def reverse_engineering(package_root: Path, output_dir: Path):
+    ctx = Context()
+    ctx.git_repo = GitRepository(output_dir)
+    ctx.repo = ProjectRepo(ctx.git_repo)
+    action = RebuildClassView(name="ReverseEngineering", i_context=str(package_root), llm=LLM(), context=ctx)
+    await action.run()
+
+    action = RebuildSequenceView(name="ReverseEngineering", llm=LLM(), context=ctx)
+    await action.run()
+
+
+if __name__ == "__main__":
+    app()
--- a/examples/search_with_specific_engine.py
+++ b/examples/search_with_specific_engine.py
@ -4,21 +4,17 @@
 """
 import asyncio

+from metagpt.config2 import Config
 from metagpt.roles import Searcher
-from metagpt.tools.search_engine import SearchEngine, SearchEngineType
+from metagpt.tools.search_engine import SearchEngine


 async def main():
    question = "What are the most interesting human facts?"
-    kwargs = {"api_key": "", "cse_id": "", "proxy": None}
-    # Serper API
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPER_GOOGLE, **kwargs)).run(question)
-    # SerpAPI
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPAPI_GOOGLE, **kwargs)).run(question)
-    # Google API
-    # await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DIRECT_GOOGLE, **kwargs)).run(question)
-    # DDG API
-    await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DUCK_DUCK_GO, **kwargs)).run(question)
+
+    search = Config.default().search
+    kwargs = {"api_key": search.api_key, "cse_id": search.cse_id, "proxy": None}
+    await Searcher(search_engine=SearchEngine(engine=search.api_type, **kwargs)).run(question)


 if __name__ == "__main__":
--- a/metagpt/actions/init.py
+++ b/metagpt/actions/init.py
@ -22,9 +22,9 @@ from metagpt.actions.write_code_review import WriteCodeReview
 from metagpt.actions.write_prd import WritePRD
 from metagpt.actions.write_prd_review import WritePRDReview
 from metagpt.actions.write_test import WriteTest
-from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
-from metagpt.actions.mi.write_analysis_code import WriteCodeWithoutTools, WriteCodeWithTools
-from metagpt.actions.mi.write_plan import WritePlan
+from metagpt.actions.di.execute_nb_code import ExecuteNbCode
+from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
+from metagpt.actions.di.write_plan import WritePlan


 class ActionType(Enum):
@ -46,8 +46,7 @@ class ActionType(Enum):
    WEB_BROWSE_AND_SUMMARIZE = WebBrowseAndSummarize
    CONDUCT_RESEARCH = ConductResearch
    EXECUTE_NB_CODE = ExecuteNbCode
-    WRITE_CODE_WITHOUT_TOOLS = WriteCodeWithoutTools
-    WRITE_CODE_WITH_TOOLS = WriteCodeWithTools
+    WRITE_ANALYSIS_CODE = WriteAnalysisCode
    WRITE_PLAN = WritePlan


--- a/metagpt/actions/design_api_an.py
+++ b/metagpt/actions/design_api_an.py
@ -8,7 +8,6 @@
 from typing import List

 from metagpt.actions.action_node import ActionNode
-from metagpt.logs import logger
 from metagpt.utils.mermaid import MMC1, MMC2

 IMPLEMENTATION_APPROACH = ActionNode(
@ -109,14 +108,3 @@ REFINED_NODES = [

 DESIGN_API_NODE = ActionNode.from_children("DesignAPI", NODES)
 REFINED_DESIGN_NODE = ActionNode.from_children("RefinedDesignAPI", REFINED_NODES)
-
-
-def main():
-    prompt = DESIGN_API_NODE.compile(context="")
-    logger.info(prompt)
-    prompt = REFINED_DESIGN_NODE.compile(context="")
-    logger.info(prompt)
-
-
-if __name__ == "__main__":
-    main()
--- a/metagpt/actions/di/init.py
+++ b/metagpt/actions/di/init.py
--- a/metagpt/actions/di/ask_review.py
+++ b/metagpt/actions/di/ask_review.py
--- a/metagpt/actions/di/execute_nb_code.py
+++ b/metagpt/actions/di/execute_nb_code.py
@ -9,7 +9,6 @@ from __future__ import annotations
 import asyncio
 import base64
 import re
-import traceback
 from typing import Literal, Tuple

 import nbformat
@ -58,7 +57,23 @@ class ExecuteNbCode(Action):

    async def terminate(self):
        """kill NotebookClient"""
-        await self.nb_client._async_cleanup_kernel()
+        if self.nb_client.km is not None and await self.nb_client.km.is_alive():
+            await self.nb_client.km.shutdown_kernel(now=True)
+            await self.nb_client.km.cleanup_resources()
+
+            channels = [
+                self.nb_client.kc.stdin_channel,  # The channel for handling standard input to the kernel.
+                self.nb_client.kc.hb_channel,  # The channel for heartbeat communication between the kernel and client.
+                self.nb_client.kc.control_channel,  # The channel for controlling the kernel.
+            ]
+
+            # Stops all the running channels for this kernel
+            for channel in channels:
+                if channel.is_alive():
+                    channel.stop()
+
+            self.nb_client.kc = None
+            self.nb_client.km = None

    async def reset(self):
        """reset NotebookClient"""
@ -91,17 +106,17 @@ class ExecuteNbCode(Action):
        else:
            cell["outputs"].append(new_output(output_type="stream", name="stdout", text=str(output)))

-    def parse_outputs(self, outputs: list[str]) -> str:
+    def parse_outputs(self, outputs: list[str], keep_len: int = 2000) -> Tuple[bool, str]:
        """Parses the outputs received from notebook execution."""
        assert isinstance(outputs, list)
-        parsed_output = ""
-
+        parsed_output, is_success = [], True
        for i, output in enumerate(outputs):
+            output_text = ""
            if output["output_type"] == "stream" and not any(
                tag in output["text"]
                for tag in ["| INFO     | metagpt", "| ERROR    | metagpt", "| WARNING  | metagpt", "DEBUG"]
            ):
-                parsed_output += output["text"]
+                output_text = output["text"]
            elif output["output_type"] == "display_data":
                if "image/png" in output["data"]:
                    self.show_bytes_figure(output["data"]["image/png"], self.interaction)
@ -110,8 +125,22 @@ class ExecuteNbCode(Action):
                        f"{i}th output['data'] from nbclient outputs dont have image/png, continue next output ..."
                    )
            elif output["output_type"] == "execute_result":
-                parsed_output += output["data"]["text/plain"]
-        return parsed_output
+                output_text = output["data"]["text/plain"]
+            elif output["output_type"] == "error":
+                output_text, is_success = "\n".join(output["traceback"]), False
+
+            # handle coroutines that are not executed asynchronously
+            if output_text.strip().startswith("<coroutine object"):
+                output_text = "Executed code failed, you need use key word 'await' to run a async code."
+                is_success = False
+
+            output_text = remove_escape_and_color_codes(output_text)
+            # The useful information of the exception is at the end,
+            # the useful information of normal output is at the begining.
+            output_text = output_text[:keep_len] if is_success else output_text[-keep_len:]
+
+            parsed_output.append(output_text)
+        return is_success, ",".join(parsed_output)

    def show_bytes_figure(self, image_base64: str, interaction_type: Literal["ipython", None]):
        image_bytes = base64.b64decode(image_base64)
@ -145,7 +174,7 @@ class ExecuteNbCode(Action):
        """
        try:
            await self.nb_client.async_execute_cell(cell, cell_index)
-            return True, ""
+            return self.parse_outputs(self.nb.cells[-1].outputs)
        except CellTimeoutError:
            assert self.nb_client.km is not None
            await self.nb_client.km.interrupt_kernel()
@ -156,7 +185,7 @@ class ExecuteNbCode(Action):
            await self.reset()
            return False, "DeadKernelError"
        except Exception:
-            return False, f"{traceback.format_exc()}"
+            return self.parse_outputs(self.nb.cells[-1].outputs)

    async def run(self, code: str, language: Literal["python", "markdown"] = "python") -> Tuple[str, bool]:
        """
@ -173,14 +202,7 @@ class ExecuteNbCode(Action):

            # run code
            cell_index = len(self.nb.cells) - 1
-            success, error_message = await self.run_cell(self.nb.cells[-1], cell_index)
-
-            if not success:
-                return truncate(remove_escape_and_color_codes(error_message), is_success=success)
-
-            # code success
-            outputs = self.parse_outputs(self.nb.cells[-1].outputs)
-            outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success)
+            success, outputs = await self.run_cell(self.nb.cells[-1], cell_index)

            if "!pip" in code:
                success = False
@ -196,54 +218,39 @@ class ExecuteNbCode(Action):
            raise ValueError(f"Only support for language: python, markdown, but got {language}, ")


-def truncate(result: str, keep_len: int = 2000, is_success: bool = True):
-    """对于超出keep_len个字符的result: 执行失败的代码, 展示result后keep_len个字符; 执行成功的代码, 展示result前keep_len个字符。"""
-    if is_success:
-        desc = f"Executed code successfully. Truncated to show only first {keep_len} characters\n"
-    else:
-        desc = f"Executed code failed, please reflect the cause of bug and then debug. Truncated to show only last {keep_len} characters\n"
-
-    if result.strip().startswith("<coroutine object"):
-        result = "Executed code failed, you need use key word 'await' to run a async code."
-        return result, False
-
-    if len(result) > keep_len:
-        result = result[-keep_len:] if not is_success else result[:keep_len]
-        return desc + result, is_success
-
-    return result, is_success
-
-
 def remove_escape_and_color_codes(input_str: str):
-    # 使用正则表达式去除转义字符和颜色代码
+    # 使用正则表达式去除jupyter notebook输出结果中的转义字符和颜色代码
+    # Use regular expressions to get rid of escape characters and color codes in jupyter notebook output.
    pattern = re.compile(r"\x1b\[[0-9;]*[mK]")
    result = pattern.sub("", input_str)
    return result


 def display_markdown(content: str):
-    # 使用正则表达式逐个匹配代码块
+    # Use regular expressions to match blocks of code one by one.
    matches = re.finditer(r"```(.+?)```", content, re.DOTALL)
    start_index = 0
    content_panels = []
-    # 逐个打印匹配到的文本和代码
+    # Set the text background color and text color.
+    style = "black on white"
+    # Print the matching text and code one by one.
    for match in matches:
        text_content = content[start_index : match.start()].strip()
        code_content = match.group(0).strip()[3:-3]  # Remove triple backticks

        if text_content:
-            content_panels.append(Panel(Markdown(text_content), box=MINIMAL))
+            content_panels.append(Panel(Markdown(text_content), style=style, box=MINIMAL))

        if code_content:
-            content_panels.append(Panel(Markdown(f"```{code_content}"), box=MINIMAL))
+            content_panels.append(Panel(Markdown(f"```{code_content}"), style=style, box=MINIMAL))
        start_index = match.end()

-    # 打印剩余文本（如果有）
+    # Print remaining text (if any).
    remaining_text = content[start_index:].strip()
    if remaining_text:
-        content_panels.append(Panel(Markdown(remaining_text), box=MINIMAL))
+        content_panels.append(Panel(Markdown(remaining_text), style=style, box=MINIMAL))

-    # 在Live模式中显示所有Panel
+    # Display all panels in Live mode.
    with Live(auto_refresh=False, console=Console(), vertical_overflow="visible") as live:
        live.update(Group(*content_panels))
        live.refresh()
--- a/metagpt/actions/di/write_analysis_code.py
+++ b/metagpt/actions/di/write_analysis_code.py
@ -0,0 +1,73 @@
+# -*- encoding: utf-8 -*-
+"""
+@Date    :   2023/11/20 13:19:39
+@Author  :   orange-crow
+@File    :   write_analysis_code.py
+"""
+from __future__ import annotations
+
+import json
+
+from metagpt.actions import Action
+from metagpt.prompts.di.write_analysis_code import (
+    CHECK_DATA_PROMPT,
+    DEBUG_REFLECTION_EXAMPLE,
+    INTERPRETER_SYSTEM_MSG,
+    REFLECTION_PROMPT,
+    REFLECTION_SYSTEM_MSG,
+    STRUCTUAL_PROMPT,
+)
+from metagpt.schema import Message, Plan
+from metagpt.utils.common import CodeParser, process_message, remove_comments
+
+
+class WriteAnalysisCode(Action):
+    async def _debug_with_reflection(self, context: list[Message], working_memory: list[Message]):
+        reflection_prompt = REFLECTION_PROMPT.format(
+            debug_example=DEBUG_REFLECTION_EXAMPLE,
+            context=context,
+            previous_impl=working_memory,
+        )
+
+        rsp = await self._aask(reflection_prompt, system_msgs=[REFLECTION_SYSTEM_MSG])
+        reflection = json.loads(CodeParser.parse_code(block=None, text=rsp))
+
+        return reflection["improved_impl"]
+
+    async def run(
+        self,
+        user_requirement: str,
+        plan_status: str = "",
+        tool_info: str = "",
+        working_memory: list[Message] = None,
+        use_reflection: bool = False,
+        **kwargs,
+    ) -> str:
+        structual_prompt = STRUCTUAL_PROMPT.format(
+            user_requirement=user_requirement,
+            plan_status=plan_status,
+            tool_info=tool_info,
+        )
+
+        working_memory = working_memory or []
+        context = process_message([Message(content=structual_prompt, role="user")] + working_memory)
+
+        # LLM call
+        if use_reflection:
+            code = await self._debug_with_reflection(context=context, working_memory=working_memory)
+        else:
+            rsp = await self.llm.aask(context, system_msgs=[INTERPRETER_SYSTEM_MSG], **kwargs)
+            code = CodeParser.parse_code(block=None, text=rsp)
+
+        return code
+
+
+class CheckData(Action):
+    async def run(self, plan: Plan) -> dict:
+        finished_tasks = plan.get_finished_tasks()
+        code_written = [remove_comments(task.code) for task in finished_tasks]
+        code_written = "\n\n".join(code_written)
+        prompt = CHECK_DATA_PROMPT.format(code_written=code_written)
+        rsp = await self._aask(prompt)
+        code = CodeParser.parse_code(block=None, text=rsp)
+        return code
--- a/metagpt/actions/di/write_plan.py
+++ b/metagpt/actions/di/write_plan.py
@ -12,81 +12,49 @@ from typing import Tuple

 from metagpt.actions import Action
 from metagpt.logs import logger
-from metagpt.prompts.mi.write_analysis_code import (
-    ASSIGN_TASK_TYPE_CONFIG,
-    ASSIGN_TASK_TYPE_PROMPT,
-)
 from metagpt.schema import Message, Plan, Task
-from metagpt.tools import TOOL_REGISTRY
-from metagpt.utils.common import CodeParser, create_func_call_config
+from metagpt.strategy.task_type import TaskType
+from metagpt.utils.common import CodeParser


 class WritePlan(Action):
    PROMPT_TEMPLATE: str = """
    # Context:
-    __context__
+    {context}
+    # Available Task Types:
+    {task_type_desc}
    # Task:
-    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
+    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to {max_tasks} tasks.
    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
    If you encounter errors on the current task, revise and output the current single task only.
    Output a list of jsons following the format:
    ```json
    [
-        {
+        {{
            "task_id": str = "unique identifier for a task in plan, can be an ordinal",
            "dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
            "instruction": "what you should do in this task, one short phrase or sentence",
-        },
+            "task_type": "type of this task, should be one of Available Task Types",
+        }},
        ...
    ]
    ```
    """

-    async def assign_task_type(self, tasks: list[dict]) -> str:
-        """Assign task type to each task in tasks
-
-        Args:
-            tasks (list[dict]): tasks to be assigned task type
-
-        Returns:
-            str: tasks with task type assigned in a json string
-        """
-        task_info = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
-        task_type_desc = "\n".join(
-            [f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()]
-        )  # task type are binded with tool type now, should be improved in the future
-        prompt = ASSIGN_TASK_TYPE_PROMPT.format(
-            task_info=task_info, task_type_desc=task_type_desc
-        )  # task types are set to be the same as tool types, for now
-        tool_config = create_func_call_config(ASSIGN_TASK_TYPE_CONFIG)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-        task_type_list = rsp["task_type"]
-        logger.info(f"assigned task types: {task_type_list}")
-        for task, task_type in zip(tasks, task_type_list):
-            task["task_type"] = task_type
-        return json.dumps(tasks)
-
-    async def run(self, context: list[Message], max_tasks: int = 5, use_tools: bool = False) -> str:
-        prompt = (
-            self.PROMPT_TEMPLATE.replace("__context__", "\n".join([str(ct) for ct in context]))
-            # .replace("__current_plan__", current_plan)
-            .replace("__max_tasks__", str(max_tasks))
+    async def run(self, context: list[Message], max_tasks: int = 5) -> str:
+        task_type_desc = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])
+        prompt = self.PROMPT_TEMPLATE.format(
+            context="\n".join([str(ct) for ct in context]), max_tasks=max_tasks, task_type_desc=task_type_desc
        )
        rsp = await self._aask(prompt)
        rsp = CodeParser.parse_code(block=None, text=rsp)
-        if use_tools:
-            rsp = await self.assign_task_type(json.loads(rsp))
        return rsp


-def rsp_to_tasks(rsp: str) -> list[Task]:
+def update_plan_from_rsp(rsp: str, current_plan: Plan):
    rsp = json.loads(rsp)
    tasks = [Task(**task_config) for task_config in rsp]
-    return tasks

-
-def update_plan_from_rsp(rsp: str, current_plan: Plan):
-    tasks = rsp_to_tasks(rsp)
    if len(tasks) == 1 or tasks[0].dependent_task_ids:
        if tasks[0].dependent_task_ids and len(tasks) > 1:
            # tasks[0].dependent_task_ids means the generated tasks are not a complete plan
--- a/metagpt/actions/mi/debug_code.py
+++ b/metagpt/actions/mi/debug_code.py
@ -1,109 +0,0 @@
-from __future__ import annotations
-
-from metagpt.actions.mi.write_analysis_code import BaseWriteAnalysisCode
-from metagpt.logs import logger
-from metagpt.schema import Message
-from metagpt.utils.common import create_func_call_config
-
-DEBUG_REFLECTION_EXAMPLE = '''
-Example 1:
-[previous impl]:
-```python
-def add(a: int, b: int) -> int:
-   """
-   Given integers a and b, return the total value of a and b.
-   """
-   return a - b
-```
-
-[runtime Error]:
-Tested passed:
-
-Tests failed:
-assert add(1, 2) == 3 # output: -1
-assert add(1, 2) == 4 # output: -1
-
-[reflection on previous impl]:
-The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
-
-[improved impl]:
-```python
-def add(a: int, b: int) -> int:
-   """
-   Given integers a and b, return the total value of a and b.
-   """
-   return a + b
-```
-'''
-
-REFLECTION_PROMPT = """
-Here is an example for you.
-{debug_example}
-[context]
-{context}
-
-[previous impl]
-{code}
-[runtime Error]
-{runtime_result}
-
-Analysis the error step by step, provide me improve method and code. Remember to follow [context] requirement. Don't forget write code for steps behind the error step.
-[reflection on previous impl]:
-xxx
-"""
-
-CODE_REFLECTION = {
-    "name": "execute_reflection_code",
-    "description": "Execute reflection code.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "reflection": {
-                "type": "string",
-                "description": "Reflection on previous impl.",
-            },
-            "improved_impl": {
-                "type": "string",
-                "description": "Refined code after reflection.",
-            },
-        },
-        "required": ["reflection", "improved_impl"],
-    },
-}
-
-
-class DebugCode(BaseWriteAnalysisCode):
-    async def run(
-        self,
-        context: list[Message] = None,
-        code: str = "",
-        runtime_result: str = "",
-    ) -> str:
-        """
-        Execute the debugging process based on the provided context, code, and runtime_result.
-
-        Args:
-            context (list[Message]): A list of Message objects representing the context.
-            code (str): The code to be debugged.
-            runtime_result (str): The result of the code execution.
-
-        Returns:
-            str: The improved implementation based on the debugging process.
-        """
-
-        info = []
-        reflection_prompt = REFLECTION_PROMPT.format(
-            debug_example=DEBUG_REFLECTION_EXAMPLE,
-            context=context,
-            code=code,
-            runtime_result=runtime_result,
-        )
-        system_prompt = "You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation "
-        info.append(Message(role="system", content=system_prompt))
-        info.append(Message(role="user", content=reflection_prompt))
-
-        tool_config = create_func_call_config(CODE_REFLECTION)
-        reflection = await self.llm.aask_code(messages=info, **tool_config)
-        logger.info(f"reflection is {reflection}")
-
-        return {"code": reflection["improved_impl"]}
--- a/metagpt/actions/mi/ml_action.py
+++ b/metagpt/actions/mi/ml_action.py
@ -1,70 +0,0 @@
-from __future__ import annotations
-
-from typing import Tuple
-
-from metagpt.actions import Action
-from metagpt.actions.mi.write_analysis_code import WriteCodeWithTools
-from metagpt.prompts.mi.ml_action import (
-    ML_GENERATE_CODE_PROMPT,
-    ML_TOOL_USAGE_PROMPT,
-    PRINT_DATA_COLUMNS,
-    UPDATE_DATA_COLUMNS,
-)
-from metagpt.prompts.mi.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
-from metagpt.schema import Message, Plan
-from metagpt.utils.common import create_func_call_config, remove_comments
-
-
-class WriteCodeWithToolsML(WriteCodeWithTools):
-    async def run(
-        self,
-        context: list[Message],
-        plan: Plan = None,
-        column_info: str = "",
-        **kwargs,
-    ) -> Tuple[list[Message], str]:
-        # prepare tool schemas and tool-type-specific instruction
-        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
-
-        # ML-specific variables to be used in prompt
-        finished_tasks = plan.get_finished_tasks()
-        code_context = [remove_comments(task.code) for task in finished_tasks]
-        code_context = "\n\n".join(code_context)
-
-        # prepare prompt depending on tool availability & LLM call
-        if tool_schemas:
-            prompt = ML_TOOL_USAGE_PROMPT.format(
-                user_requirement=plan.goal,
-                history_code=code_context,
-                current_task=plan.current_task.instruction,
-                column_info=column_info,
-                tool_type_usage_prompt=tool_type_usage_prompt,
-                tool_schemas=tool_schemas,
-            )
-
-        else:
-            prompt = ML_GENERATE_CODE_PROMPT.format(
-                user_requirement=plan.goal,
-                history_code=code_context,
-                current_task=plan.current_task.instruction,
-                column_info=column_info,
-                tool_type_usage_prompt=tool_type_usage_prompt,
-            )
-        tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-
-        # Extra output to be used for potential debugging
-        context = [Message(content=prompt, role="user")]
-
-        return context, rsp
-
-
-class UpdateDataColumns(Action):
-    async def run(self, plan: Plan = None) -> dict:
-        finished_tasks = plan.get_finished_tasks()
-        code_context = [remove_comments(task.code) for task in finished_tasks]
-        code_context = "\n\n".join(code_context)
-        prompt = UPDATE_DATA_COLUMNS.format(history_code=code_context)
-        tool_config = create_func_call_config(PRINT_DATA_COLUMNS)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-        return rsp
--- a/metagpt/actions/mi/write_analysis_code.py
+++ b/metagpt/actions/mi/write_analysis_code.py
@ -1,155 +0,0 @@
-# -*- encoding: utf-8 -*-
-"""
-@Date    :   2023/11/20 13:19:39
-@Author  :   orange-crow
-@File    :   write_analysis_code.py
-"""
-from __future__ import annotations
-
-from typing import Tuple
-
-from metagpt.actions import Action
-from metagpt.logs import logger
-from metagpt.prompts.mi.write_analysis_code import (
-    CODE_GENERATOR_WITH_TOOLS,
-    SELECT_FUNCTION_TOOLS,
-    TOOL_RECOMMENDATION_PROMPT,
-    TOOL_USAGE_PROMPT,
-)
-from metagpt.schema import Message, Plan, SystemMessage
-from metagpt.tools import TOOL_REGISTRY
-from metagpt.tools.tool_registry import validate_tool_names
-from metagpt.utils.common import create_func_call_config
-
-
-class BaseWriteAnalysisCode(Action):
-    DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**"""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
-    # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
-
-    def insert_system_message(self, context: list[Message], system_msg: str = None):
-        system_msg = system_msg or self.DEFAULT_SYSTEM_MSG
-        context.insert(0, SystemMessage(content=system_msg)) if context[0].role != "system" else None
-        return context
-
-    async def run(self, context: list[Message], plan: Plan = None) -> dict:
-        """Run of a code writing action, used in data analysis or modeling
-
-        Args:
-            context (list[Message]): Action output history, source action denoted by Message.cause_by
-            plan (Plan, optional): Overall plan. Defaults to None.
-
-        Returns:
-            dict: code result in the format of {"code": "print('hello world')", "language": "python"}
-        """
-        raise NotImplementedError
-
-
-class WriteCodeWithoutTools(BaseWriteAnalysisCode):
-    """Ask LLM to generate codes purely by itself without local user-defined tools"""
-
-    async def run(self, context: list[Message], plan: Plan = None, system_msg: str = None, **kwargs) -> dict:
-        messages = self.insert_system_message(context, system_msg)
-        rsp = await self.llm.aask_code(messages, **kwargs)
-        return rsp
-
-
-class WriteCodeWithTools(BaseWriteAnalysisCode):
-    """Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
-
-    # selected tools to choose from, listed by their names. An empty list means selection from all tools.
-    selected_tools: list[str] = []
-
-    def _get_tools_by_type(self, tool_type: str) -> dict:
-        """
-        Retreive tools by tool type from registry, but filtered by pre-selected tool list
-
-        Args:
-            tool_type (str): Tool type to retrieve from the registry
-
-        Returns:
-            dict: A dict of tool name to Tool object, representing available tools under the type
-        """
-        candidate_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
-        if self.selected_tools:
-            candidate_tool_names = set(self.selected_tools) & candidate_tools.keys()
-            candidate_tools = {tool_name: candidate_tools[tool_name] for tool_name in candidate_tool_names}
-        return candidate_tools
-
-    async def _recommend_tool(
-        self,
-        task: str,
-        available_tools: dict,
-    ) -> dict:
-        """
-        Recommend tools for the specified task.
-
-        Args:
-            task (str): the task to recommend tools for
-            available_tools (dict): the available tools description
-
-        Returns:
-            dict: schemas of recommended tools for the specified task
-        """
-        prompt = TOOL_RECOMMENDATION_PROMPT.format(
-            current_task=task,
-            available_tools=available_tools,
-        )
-        tool_config = create_func_call_config(SELECT_FUNCTION_TOOLS)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-        recommend_tools = rsp["recommend_tools"]
-        logger.info(f"Recommended tools: \n{recommend_tools}")
-
-        # Parses and validates the  recommended tools, for LLM might hallucinate and recommend non-existing tools
-        valid_tools = validate_tool_names(recommend_tools, return_tool_object=True)
-
-        tool_schemas = {tool.name: tool.schemas for tool in valid_tools}
-
-        return tool_schemas
-
-    async def _prepare_tools(self, plan: Plan) -> Tuple[dict, str]:
-        """Prepare tool schemas and usage instructions according to current task
-
-        Args:
-            plan (Plan): The overall plan containing task information.
-
-        Returns:
-            Tuple[dict, str]: A tool schemas ({tool_name: tool_schema_dict}) and a usage prompt for the type of tools selected
-        """
-        # find tool type from task type through exact match, can extend to retrieval in the future
-        tool_type = plan.current_task.task_type
-
-        # prepare tool-type-specific instruction
-        tool_type_usage_prompt = (
-            TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
-        )
-
-        # prepare schemas of available tools
-        tool_schemas = {}
-        available_tools = self._get_tools_by_type(tool_type)
-        if available_tools:
-            available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
-            tool_schemas = await self._recommend_tool(plan.current_task.instruction, available_tools)
-
-        return tool_schemas, tool_type_usage_prompt
-
-    async def run(
-        self,
-        context: list[Message],
-        plan: Plan,
-        **kwargs,
-    ) -> str:
-        # prepare tool schemas and tool-type-specific instruction
-        tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
-
-        # form a complete tool usage instruction and include it as a message in context
-        tools_instruction = TOOL_USAGE_PROMPT.format(
-            tool_schemas=tool_schemas, tool_type_usage_prompt=tool_type_usage_prompt
-        )
-        context.append(Message(content=tools_instruction, role="user"))
-
-        # prepare prompt & LLM call
-        prompt = self.insert_system_message(context)
-        tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
-        rsp = await self.llm.aask_code(prompt, **tool_config)
-
-        return rsp
--- a/metagpt/actions/project_management_an.py
+++ b/metagpt/actions/project_management_an.py
@ -8,7 +8,6 @@
 from typing import List

 from metagpt.actions.action_node import ActionNode
-from metagpt.logs import logger

 REQUIRED_PYTHON_PACKAGES = ActionNode(
    key="Required Python packages",
@ -119,14 +118,3 @@ REFINED_NODES = [

 PM_NODE = ActionNode.from_children("PM_NODE", NODES)
 REFINED_PM_NODE = ActionNode.from_children("REFINED_PM_NODE", REFINED_NODES)
-
-
-def main():
-    prompt = PM_NODE.compile(context="")
-    logger.info(prompt)
-    prompt = REFINED_PM_NODE.compile(context="")
-    logger.info(prompt)
-
-
-if __name__ == "__main__":
-    main()
--- a/metagpt/actions/rebuild_class_view.py
+++ b/metagpt/actions/rebuild_class_view.py
@ -4,10 +4,12 @@
@Time    : 2023/12/19
@Author  : mashenquan
@File    : rebuild_class_view.py
-@Desc    : Rebuild class view info
+@Desc    : Reconstructs class diagram from a source code project.
+    Implement RFC197, https://deepwisdom.feishu.cn/wiki/VyK0wfq56ivuvjklMKJcmHQknGt
 """
-import re
+
 from pathlib import Path
+from typing import Optional, Set, Tuple

 import aiofiles

@ -21,86 +23,144 @@ from metagpt.const import (
    GRAPH_REPO_FILE_REPO,
 )
 from metagpt.logs import logger
-from metagpt.repo_parser import RepoParser
-from metagpt.schema import ClassAttribute, ClassMethod, ClassView
-from metagpt.utils.common import split_namespace
+from metagpt.repo_parser import DotClassInfo, RepoParser
+from metagpt.schema import UMLClassView
+from metagpt.utils.common import concat_namespace, split_namespace
 from metagpt.utils.di_graph_repository import DiGraphRepository
 from metagpt.utils.graph_repository import GraphKeyword, GraphRepository


 class RebuildClassView(Action):
+    """
+    Reconstructs a graph repository about class diagram from a source code project.
+
+    Attributes:
+        graph_db (Optional[GraphRepository]): The optional graph repository.
+    """
+
+    graph_db: Optional[GraphRepository] = None
+
    async def run(self, with_messages=None, format=config.prompt_schema):
+        """
+        Implementation of `Action`'s `run` method.
+
+        Args:
+            with_messages (Optional[Type]): An optional argument specifying messages to react to.
+            format (str): The format for the prompt schema.
+        """
        graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name
-        graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json")))
+        self.graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json")))
        repo_parser = RepoParser(base_directory=Path(self.i_context))
        # use pylint
        class_views, relationship_views, package_root = await repo_parser.rebuild_class_views(path=Path(self.i_context))
-        await GraphRepository.update_graph_db_with_class_views(graph_db, class_views)
-        await GraphRepository.update_graph_db_with_class_relationship_views(graph_db, relationship_views)
+        await GraphRepository.update_graph_db_with_class_views(self.graph_db, class_views)
+        await GraphRepository.update_graph_db_with_class_relationship_views(self.graph_db, relationship_views)
+        await GraphRepository.rebuild_composition_relationship(self.graph_db)
        # use ast
        direction, diff_path = self._diff_path(path_root=Path(self.i_context).resolve(), package_root=package_root)
        symbols = repo_parser.generate_symbols()
        for file_info in symbols:
            # Align to the same root directory in accordance with `class_views`.
            file_info.file = self._align_root(file_info.file, direction, diff_path)
-            await GraphRepository.update_graph_db_with_file_info(graph_db, file_info)
-        await self._create_mermaid_class_views(graph_db=graph_db)
-        await graph_db.save()
+            await GraphRepository.update_graph_db_with_file_info(self.graph_db, file_info)
+        await self._create_mermaid_class_views()
+        await self.graph_db.save()

-    async def _create_mermaid_class_views(self, graph_db):
-        path = Path(self.context.git_repo.workdir) / DATA_API_DESIGN_FILE_REPO
+    async def _create_mermaid_class_views(self) -> str:
+        """Creates a Mermaid class diagram using data from the `graph_db` graph repository.
+
+        This method utilizes information stored in the graph repository to generate a Mermaid class diagram.
+        Returns:
+            mermaid class diagram file name.
+        """
+        path = self.context.git_repo.workdir / DATA_API_DESIGN_FILE_REPO
        path.mkdir(parents=True, exist_ok=True)
        pathname = path / self.context.git_repo.workdir.name
-        async with aiofiles.open(str(pathname.with_suffix(".mmd")), mode="w", encoding="utf-8") as writer:
+        filename = str(pathname.with_suffix(".class_diagram.mmd"))
+        async with aiofiles.open(filename, mode="w", encoding="utf-8") as writer:
            content = "classDiagram\n"
            logger.debug(content)
            await writer.write(content)
            # class names
-            rows = await graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
+            rows = await self.graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
            class_distinct = set()
            relationship_distinct = set()
            for r in rows:
-                await RebuildClassView._create_mermaid_class(r.subject, graph_db, writer, class_distinct)
+                content = await self._create_mermaid_class(r.subject)
+                if content:
+                    await writer.write(content)
+                    class_distinct.add(r.subject)
            for r in rows:
-                await RebuildClassView._create_mermaid_relationship(r.subject, graph_db, writer, relationship_distinct)
+                content, distinct = await self._create_mermaid_relationship(r.subject)
+                if content:
+                    logger.debug(content)
+                    await writer.write(content)
+                    relationship_distinct.update(distinct)
+        logger.info(f"classes: {len(class_distinct)}, relationship: {len(relationship_distinct)}")

-    @staticmethod
-    async def _create_mermaid_class(ns_class_name, graph_db, file_writer, distinct):
+        if self.i_context:
+            r_filename = Path(filename).relative_to(self.context.git_repo.workdir)
+            await self.graph_db.insert(
+                subject=self.i_context, predicate="hasMermaidClassDiagramFile", object_=str(r_filename)
+            )
+            logger.info(f"{self.i_context} hasMermaidClassDiagramFile {filename}")
+        return filename
+
+    async def _create_mermaid_class(self, ns_class_name) -> str:
+        """Generates a Mermaid class diagram for a specific class using data from the `graph_db` graph repository.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed name of the class for which the Mermaid class diagram is to be created.
+
+        Returns:
+            str: A Mermaid code block object in markdown representing the class diagram.
+        """
        fields = split_namespace(ns_class_name)
        if len(fields) > 2:
            # Ignore sub-class
-            return
+            return ""

-        class_view = ClassView(name=fields[1])
-        rows = await graph_db.select(subject=ns_class_name)
-        for r in rows:
-            name = split_namespace(r.object_)[-1]
-            name, visibility, abstraction = RebuildClassView._parse_name(name=name, language="python")
-            if r.predicate == GraphKeyword.HAS_CLASS_PROPERTY:
-                var_type = await RebuildClassView._parse_variable_type(r.object_, graph_db)
-                attribute = ClassAttribute(
-                    name=name, visibility=visibility, abstraction=bool(abstraction), value_type=var_type
-                )
-                class_view.attributes.append(attribute)
-            elif r.predicate == GraphKeyword.HAS_CLASS_FUNCTION:
-                method = ClassMethod(name=name, visibility=visibility, abstraction=bool(abstraction))
-                await RebuildClassView._parse_function_args(method, r.object_, graph_db)
-                class_view.methods.append(method)
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_DETAIL)
+        if not rows:
+            return ""
+        dot_class_info = DotClassInfo.model_validate_json(rows[0].object_)
+        class_view = UMLClassView.load_dot_class_info(dot_class_info)

-        # update graph db
-        await graph_db.insert(ns_class_name, GraphKeyword.HAS_CLASS_VIEW, class_view.model_dump_json())
+        # update uml view
+        await self.graph_db.insert(ns_class_name, GraphKeyword.HAS_CLASS_VIEW, class_view.model_dump_json())
+        # update uml isCompositeOf
+        for c in dot_class_info.compositions:
+            await self.graph_db.insert(
+                subject=ns_class_name,
+                predicate=GraphKeyword.IS + COMPOSITION + GraphKeyword.OF,
+                object_=concat_namespace("?", c),
+            )
+
+        # update uml isAggregateOf
+        for a in dot_class_info.aggregations:
+            await self.graph_db.insert(
+                subject=ns_class_name,
+                predicate=GraphKeyword.IS + AGGREGATION + GraphKeyword.OF,
+                object_=concat_namespace("?", a),
+            )

        content = class_view.get_mermaid(align=1)
        logger.debug(content)
-        await file_writer.write(content)
-        distinct.add(ns_class_name)
+        return content

-    @staticmethod
-    async def _create_mermaid_relationship(ns_class_name, graph_db, file_writer, distinct):
+    async def _create_mermaid_relationship(self, ns_class_name: str) -> Tuple[Optional[str], Optional[Set]]:
+        """Generates a Mermaid class relationship diagram for a specific class using data from the `graph_db` graph repository.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which the Mermaid relationship diagram is to be created.
+
+        Returns:
+            Tuple[str, Set]: A tuple containing the relationship diagram as a string and a set of deduplication.
+        """
        s_fields = split_namespace(ns_class_name)
        if len(s_fields) > 2:
            # Ignore sub-class
-            return
+            return None, None

        predicates = {GraphKeyword.IS + v + GraphKeyword.OF: v for v in [GENERALIZATION, COMPOSITION, AGGREGATION]}
        mappings = {
@ -109,8 +169,9 @@ class RebuildClassView(Action):
            AGGREGATION: " o-- ",
        }
        content = ""
+        distinct = set()
        for p, v in predicates.items():
-            rows = await graph_db.select(subject=ns_class_name, predicate=p)
+            rows = await self.graph_db.select(subject=ns_class_name, predicate=p)
            for r in rows:
                o_fields = split_namespace(r.object_)
                if len(o_fields) > 2:
@ -121,86 +182,26 @@ class RebuildClassView(Action):
                distinct.add(link)
                content += f"\t{link}\n"

-        if content:
-            logger.debug(content)
-            await file_writer.write(content)
-
-    @staticmethod
-    def _parse_name(name: str, language="python"):
-        pattern = re.compile(r"<I>(.*?)<\/I>")
-        result = re.search(pattern, name)
-
-        abstraction = ""
-        if result:
-            name = result.group(1)
-            abstraction = "*"
-        if name.startswith("__"):
-            visibility = "-"
-        elif name.startswith("_"):
-            visibility = "#"
-        else:
-            visibility = "+"
-        return name, visibility, abstraction
-
-    @staticmethod
-    async def _parse_variable_type(ns_name, graph_db) -> str:
-        rows = await graph_db.select(subject=ns_name, predicate=GraphKeyword.HAS_TYPE_DESC)
-        if not rows:
-            return ""
-        vals = rows[0].object_.replace("'", "").split(":")
-        if len(vals) == 1:
-            return ""
-        val = vals[-1].strip()
-        return "" if val == "NoneType" else val + " "
-
-    @staticmethod
-    async def _parse_function_args(method: ClassMethod, ns_name: str, graph_db: GraphRepository):
-        rows = await graph_db.select(subject=ns_name, predicate=GraphKeyword.HAS_ARGS_DESC)
-        if not rows:
-            return
-        info = rows[0].object_.replace("'", "")
-
-        fs_tag = "("
-        ix = info.find(fs_tag)
-        fe_tag = "):"
-        eix = info.rfind(fe_tag)
-        if eix < 0:
-            fe_tag = ")"
-            eix = info.rfind(fe_tag)
-        args_info = info[ix + len(fs_tag) : eix].strip()
-        method.return_type = info[eix + len(fe_tag) :].strip()
-        if method.return_type == "None":
-            method.return_type = ""
-        if "(" in method.return_type:
-            method.return_type = method.return_type.replace("(", "Tuple[").replace(")", "]")
-
-        # parse args
-        if not args_info:
-            return
-        splitter_ixs = []
-        cost = 0
-        for i in range(len(args_info)):
-            if args_info[i] == "[":
-                cost += 1
-            elif args_info[i] == "]":
-                cost -= 1
-            if args_info[i] == "," and cost == 0:
-                splitter_ixs.append(i)
-        splitter_ixs.append(len(args_info))
-        args = []
-        ix = 0
-        for eix in splitter_ixs:
-            args.append(args_info[ix:eix])
-            ix = eix + 1
-        for arg in args:
-            parts = arg.strip().split(":")
-            if len(parts) == 1:
-                method.args.append(ClassAttribute(name=parts[0].strip()))
-                continue
-            method.args.append(ClassAttribute(name=parts[0].strip(), value_type=parts[-1].strip()))
+        return content, distinct

    @staticmethod
    def _diff_path(path_root: Path, package_root: Path) -> (str, str):
+        """Returns the difference between the root path and the path information represented in the package name.
+
+        Args:
+            path_root (Path): The root path.
+            package_root (Path): The package root path.
+
+        Returns:
+            Tuple[str, str]: A tuple containing the representation of the difference ("+", "-", "=") and the path detail of the differing part.
+
+        Example:
+            >>> _diff_path(path_root=Path("/Users/x/github/MetaGPT"), package_root=Path("/Users/x/github/MetaGPT/metagpt"))
+            "-", "metagpt"
+
+            >>> _diff_path(path_root=Path("/Users/x/github/MetaGPT/metagpt"), package_root=Path("/Users/x/github/MetaGPT/metagpt"))
+            "=", "."
+        """
        if len(str(path_root)) > len(str(package_root)):
            return "+", str(path_root.relative_to(package_root))
        if len(str(path_root)) < len(str(package_root)):
@ -208,7 +209,24 @@ class RebuildClassView(Action):
        return "=", "."

    @staticmethod
-    def _align_root(path: str, direction: str, diff_path: str):
+    def _align_root(path: str, direction: str, diff_path: str) -> str:
+        """Aligns the path to the same root represented by `diff_path`.
+
+        Args:
+            path (str): The path to be aligned.
+            direction (str): The direction of alignment ('+', '-', '=').
+            diff_path (str): The path representing the difference.
+
+        Returns:
+            str: The aligned path.
+
+        Example:
+            >>> _align_root(path="metagpt/software_company.py", direction="+", diff_path="MetaGPT")
+            "MetaGPT/metagpt/software_company.py"
+
+            >>> _align_root(path="metagpt/software_company.py", direction="-", diff_path="metagpt")
+            "software_company.py"
+        """
        if direction == "=":
            return path
        if direction == "+":
--- a/metagpt/actions/rebuild_sequence_view.py
+++ b/metagpt/actions/rebuild_sequence_view.py
@ -4,34 +4,214 @@
@Time    : 2024/1/4
@Author  : mashenquan
@File    : rebuild_sequence_view.py
-@Desc    : Rebuild sequence view info
+@Desc    : Reconstruct sequence view information through reverse engineering.
+    Implement RFC197, https://deepwisdom.feishu.cn/wiki/VyK0wfq56ivuvjklMKJcmHQknGt
 """
 from __future__ import annotations

+import re
+from datetime import datetime
 from pathlib import Path
-from typing import List
+from typing import List, Optional, Set
+
+from pydantic import BaseModel
+from tenacity import retry, stop_after_attempt, wait_random_exponential

 from metagpt.actions import Action
 from metagpt.config2 import config
 from metagpt.const import GRAPH_REPO_FILE_REPO
 from metagpt.logs import logger
-from metagpt.utils.common import aread, list_files
+from metagpt.repo_parser import CodeBlockInfo, DotClassInfo
+from metagpt.schema import UMLClassView
+from metagpt.utils.common import (
+    add_affix,
+    aread,
+    auto_namespace,
+    concat_namespace,
+    general_after_log,
+    list_files,
+    parse_json_code_block,
+    read_file_block,
+    split_namespace,
+)
 from metagpt.utils.di_graph_repository import DiGraphRepository
-from metagpt.utils.graph_repository import GraphKeyword
+from metagpt.utils.graph_repository import SPO, GraphKeyword, GraphRepository
+
+
+class ReverseUseCase(BaseModel):
+    """
+    Represents a reverse engineered use case.
+
+    Attributes:
+        description (str): A description of the reverse use case.
+        inputs (List[str]): List of inputs for the reverse use case.
+        outputs (List[str]): List of outputs for the reverse use case.
+        actors (List[str]): List of actors involved in the reverse use case.
+        steps (List[str]): List of steps for the reverse use case.
+        reason (str): The reason behind the reverse use case.
+    """
+
+    description: str
+    inputs: List[str]
+    outputs: List[str]
+    actors: List[str]
+    steps: List[str]
+    reason: str
+
+
+class ReverseUseCaseDetails(BaseModel):
+    """
+    Represents details of a reverse engineered use case.
+
+    Attributes:
+        description (str): A description of the reverse use case details.
+        use_cases (List[ReverseUseCase]): List of reverse use cases.
+        relationship (List[str]): List of relationships associated with the reverse use case details.
+    """
+
+    description: str
+    use_cases: List[ReverseUseCase]
+    relationship: List[str]


 class RebuildSequenceView(Action):
-    async def run(self, with_messages=None, format=config.prompt_schema):
-        graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name
-        graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json")))
-        entries = await RebuildSequenceView._search_main_entry(graph_db)
-        for entry in entries:
-            await self._rebuild_sequence_view(entry, graph_db)
-        await graph_db.save()
+    """
+    Represents an action to reconstruct sequence view through reverse engineering.

-    @staticmethod
-    async def _search_main_entry(graph_db) -> List:
-        rows = await graph_db.select(predicate=GraphKeyword.HAS_PAGE_INFO)
+    Attributes:
+        graph_db (Optional[GraphRepository]): An optional instance of GraphRepository for graph database operations.
+    """
+
+    graph_db: Optional[GraphRepository] = None
+
+    async def run(self, with_messages=None, format=config.prompt_schema):
+        """
+        Implementation of `Action`'s `run` method.
+
+        Args:
+            with_messages (Optional[Type]): An optional argument specifying messages to react to.
+            format (str): The format for the prompt schema.
+        """
+        graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name
+        self.graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json")))
+        if not self.i_context:
+            entries = await self._search_main_entry()
+        else:
+            entries = [SPO(subject=self.i_context, predicate="", object_="")]
+        for entry in entries:
+            await self._rebuild_main_sequence_view(entry)
+            while await self._merge_sequence_view(entry):
+                pass
+        await self.graph_db.save()
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=20),
+        stop=stop_after_attempt(6),
+        after=general_after_log(logger),
+    )
+    async def _rebuild_main_sequence_view(self, entry: SPO):
+        """
+        Reconstruct the sequence diagram for the __main__ entry of the source code through reverse engineering.
+
+        Args:
+            entry (SPO): The SPO (Subject, Predicate, Object) object in the graph database that is related to the
+                subject `__name__:__main__`.
+        """
+        filename = entry.subject.split(":", 1)[0]
+        rows = await self.graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
+        classes = []
+        prefix = filename + ":"
+        for r in rows:
+            if prefix in r.subject:
+                classes.append(r)
+                await self._rebuild_use_case(r.subject)
+        participants = await self._search_participants(split_namespace(entry.subject)[0])
+        class_details = []
+        class_views = []
+        for c in classes:
+            detail = await self._get_class_detail(c.subject)
+            if not detail:
+                continue
+            class_details.append(detail)
+            view = await self._get_uml_class_view(c.subject)
+            if view:
+                class_views.append(view)
+
+            actors = await self._get_participants(c.subject)
+            participants.update(set(actors))
+
+        use_case_blocks = []
+        for c in classes:
+            use_cases = await self._get_class_use_cases(c.subject)
+            use_case_blocks.append(use_cases)
+        prompt_blocks = ["## Use Cases\n" + "\n".join(use_case_blocks)]
+        block = "## Participants\n"
+        for p in participants:
+            block += f"- {p}\n"
+        prompt_blocks.append(block)
+        block = "## Mermaid Class Views\n```mermaid\n"
+        block += "\n\n".join([c.get_mermaid() for c in class_views])
+        block += "\n```\n"
+        prompt_blocks.append(block)
+        block = "## Source Code\n```python\n"
+        block += await self._get_source_code(filename)
+        block += "\n```\n"
+        prompt_blocks.append(block)
+        prompt = "\n---\n".join(prompt_blocks)
+
+        rsp = await self.llm.aask(
+            msg=prompt,
+            system_msgs=[
+                "You are a python code to Mermaid Sequence Diagram translator in function detail.",
+                "Translate the given markdown text to a Mermaid Sequence Diagram.",
+                "Return the merged Mermaid sequence diagram in a markdown code block format.",
+            ],
+            stream=False,
+        )
+        sequence_view = rsp.removeprefix("```mermaid").removesuffix("```")
+        rows = await self.graph_db.select(subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW)
+        for r in rows:
+            if r.predicate == GraphKeyword.HAS_SEQUENCE_VIEW:
+                await self.graph_db.delete(subject=r.subject, predicate=r.predicate, object_=r.object_)
+        await self.graph_db.insert(
+            subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW, object_=sequence_view
+        )
+        await self.graph_db.insert(
+            subject=entry.subject,
+            predicate=GraphKeyword.HAS_SEQUENCE_VIEW_VER,
+            object_=concat_namespace(datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3], add_affix(sequence_view)),
+        )
+        for c in classes:
+            await self.graph_db.insert(
+                subject=entry.subject, predicate=GraphKeyword.HAS_PARTICIPANT, object_=auto_namespace(c.subject)
+            )
+        await self._save_sequence_view(subject=entry.subject, content=sequence_view)
+
+    async def _merge_sequence_view(self, entry: SPO) -> bool:
+        """
+        Augments additional information to the provided SPO (Subject, Predicate, Object) entry in the sequence diagram.
+
+        Args:
+            entry (SPO): The SPO object representing the relationship in the graph database.
+
+        Returns:
+            bool: True if additional information has been augmented, otherwise False.
+        """
+        new_participant = await self._search_new_participant(entry)
+        if not new_participant:
+            return False
+
+        await self._merge_participant(entry, new_participant)
+        return True
+
+    async def _search_main_entry(self) -> List:
+        """
+        Asynchronously searches for the SPO object that is related to `__name__:__main__`.
+
+        Returns:
+            List: A list containing information about the main entry in the sequence diagram.
+        """
+        rows = await self.graph_db.select(predicate=GraphKeyword.HAS_PAGE_INFO)
        tag = "__name__:__main__"
        entries = []
        for r in rows:
@ -39,24 +219,395 @@ class RebuildSequenceView(Action):
                entries.append(r)
        return entries

-    async def _rebuild_sequence_view(self, entry, graph_db):
-        filename = entry.subject.split(":", 1)[0]
-        src_filename = RebuildSequenceView._get_full_filename(root=self.i_context, pathname=filename)
-        if not src_filename:
+    @retry(
+        wait=wait_random_exponential(min=1, max=20),
+        stop=stop_after_attempt(6),
+        after=general_after_log(logger),
+    )
+    async def _rebuild_use_case(self, ns_class_name: str):
+        """
+        Asynchronously reconstructs the use case for the provided namespace-prefixed class name.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which the use case is to be reconstructed.
+        """
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_CLASS_USE_CASE)
+        if rows:
            return
-        content = await aread(filename=src_filename, encoding="utf-8")
-        content = f"```python\n{content}\n```\n\n---\nTranslate the code above into Mermaid Sequence Diagram."
-        data = await self.llm.aask(
-            msg=content, system_msgs=["You are a python code to Mermaid Sequence Diagram translator in function detail"]
+
+        detail = await self._get_class_detail(ns_class_name)
+        if not detail:
+            return
+        participants = set()
+        participants.update(set(detail.compositions))
+        participants.update(set(detail.aggregations))
+        class_view = await self._get_uml_class_view(ns_class_name)
+        source_code = await self._get_source_code(ns_class_name)
+
+        # prompt_blocks = [
+        #     "## Instruction\n"
+        #     "You are a python code to UML 2.0 Use Case translator.\n"
+        #     'The generated UML 2.0 Use Case must include the roles or entities listed in "Participants".\n'
+        #     "The functional descriptions of Actors and Use Cases in the generated UML 2.0 Use Case must not "
+        #     'conflict with the information in "Mermaid Class Views".\n'
+        #     'The section under `if __name__ == "__main__":` of "Source Code" contains information about external '
+        #     "system interactions with the internal system.\n"
+        # ]
+        prompt_blocks = []
+        block = "## Participants\n"
+        for p in participants:
+            block += f"- {p}\n"
+        prompt_blocks.append(block)
+        block = "## Mermaid Class Views\n```mermaid\n"
+        block += class_view.get_mermaid()
+        block += "\n```\n"
+        prompt_blocks.append(block)
+        block = "## Source Code\n```python\n"
+        block += source_code
+        block += "\n```\n"
+        prompt_blocks.append(block)
+        prompt = "\n---\n".join(prompt_blocks)
+
+        rsp = await self.llm.aask(
+            msg=prompt,
+            system_msgs=[
+                "You are a python code to UML 2.0 Use Case translator.",
+                'The generated UML 2.0 Use Case must include the roles or entities listed in "Participants".',
+                "The functional descriptions of Actors and Use Cases in the generated UML 2.0 Use Case must not "
+                'conflict with the information in "Mermaid Class Views".',
+                'The section under `if __name__ == "__main__":` of "Source Code" contains information about external '
+                "system interactions with the internal system.",
+                "Return a markdown JSON object with:\n"
+                '- a "description" key to explain what the whole source code want to do;\n'
+                '- a "use_cases" key list all use cases, each use case in the list should including a `description` '
+                "key describes about what the use case to do, a `inputs` key lists the input names of the use case "
+                "from external sources, a `outputs` key lists the output names of the use case to external sources, "
+                "a `actors` key lists the participant actors of the use case, a `steps` key lists the steps about how "
+                "the use case works step by step, a `reason` key explaining under what circumstances would the "
+                "external system execute this use case.\n"
+                '- a "relationship" key lists all the descriptions of relationship among these use cases.\n',
+            ],
+            stream=False,
+        )
+
+        code_blocks = parse_json_code_block(rsp)
+        for block in code_blocks:
+            detail = ReverseUseCaseDetails.model_validate_json(block)
+            await self.graph_db.insert(
+                subject=ns_class_name, predicate=GraphKeyword.HAS_CLASS_USE_CASE, object_=detail.model_dump_json()
+            )
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=20),
+        stop=stop_after_attempt(6),
+        after=general_after_log(logger),
+    )
+    async def _rebuild_sequence_view(self, ns_class_name: str):
+        """
+        Asynchronously reconstructs the sequence diagram for the provided namespace-prefixed class name.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which the sequence diagram is to be reconstructed.
+        """
+        await self._rebuild_use_case(ns_class_name)
+
+        prompts_blocks = []
+        use_case_markdown = await self._get_class_use_cases(ns_class_name)
+        if not use_case_markdown:  # external class
+            await self.graph_db.insert(subject=ns_class_name, predicate=GraphKeyword.HAS_SEQUENCE_VIEW, object_="")
+            return
+        block = f"## Use Cases\n{use_case_markdown}"
+        prompts_blocks.append(block)
+
+        participants = await self._get_participants(ns_class_name)
+        block = "## Participants\n" + "\n".join([f"- {s}" for s in participants])
+        prompts_blocks.append(block)
+
+        view = await self._get_uml_class_view(ns_class_name)
+        block = "## Mermaid Class Views\n```mermaid\n"
+        block += view.get_mermaid()
+        block += "\n```\n"
+        prompts_blocks.append(block)
+
+        block = "## Source Code\n```python\n"
+        block += await self._get_source_code(ns_class_name)
+        block += "\n```\n"
+        prompts_blocks.append(block)
+        prompt = "\n---\n".join(prompts_blocks)
+
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a Mermaid Sequence Diagram translator in function detail.",
+                "Translate the markdown text to a Mermaid Sequence Diagram.",
+                "Return a markdown mermaid code block.",
+            ],
+            stream=False,
+        )
+
+        sequence_view = rsp.removeprefix("```mermaid").removesuffix("```")
+        await self.graph_db.insert(
+            subject=ns_class_name, predicate=GraphKeyword.HAS_SEQUENCE_VIEW, object_=sequence_view
+        )
+
+    async def _get_participants(self, ns_class_name: str) -> List[str]:
+        """
+        Asynchronously returns the participants list of the sequence diagram for the provided namespace-prefixed SPO
+        object.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which to retrieve the participants list.
+
+        Returns:
+            List[str]: A list of participants in the sequence diagram.
+        """
+        participants = set()
+        detail = await self._get_class_detail(ns_class_name)
+        if not detail:
+            return []
+        participants.update(set(detail.compositions))
+        participants.update(set(detail.aggregations))
+        return list(participants)
+
+    async def _get_class_use_cases(self, ns_class_name: str) -> str:
+        """
+        Asynchronously assembles the context about the use case information of the namespace-prefixed SPO object.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which to retrieve use case information.
+
+        Returns:
+            str: A string containing the assembled context about the use case information.
+        """
+        block = ""
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_CLASS_USE_CASE)
+        for i, r in enumerate(rows):
+            detail = ReverseUseCaseDetails.model_validate_json(r.object_)
+            block += f"\n### {i + 1}. {detail.description}"
+            for j, use_case in enumerate(detail.use_cases):
+                block += f"\n#### {i + 1}.{j + 1}. {use_case.description}\n"
+                block += "\n##### Inputs\n" + "\n".join([f"- {s}" for s in use_case.inputs])
+                block += "\n##### Outputs\n" + "\n".join([f"- {s}" for s in use_case.outputs])
+                block += "\n##### Actors\n" + "\n".join([f"- {s}" for s in use_case.actors])
+                block += "\n##### Steps\n" + "\n".join([f"- {s}" for s in use_case.steps])
+            block += "\n#### Use Case Relationship\n" + "\n".join([f"- {s}" for s in detail.relationship])
+        return block + "\n"
+
+    async def _get_class_detail(self, ns_class_name: str) -> DotClassInfo | None:
+        """
+        Asynchronously retrieves the dot format class details of the namespace-prefixed SPO object.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which to retrieve class details.
+
+        Returns:
+            Union[DotClassInfo, None]: A DotClassInfo object representing the dot format class details,
+                                       or None if the details are not available.
+        """
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_DETAIL)
+        if not rows:
+            return None
+        dot_class_info = DotClassInfo.model_validate_json(rows[0].object_)
+        return dot_class_info
+
+    async def _get_uml_class_view(self, ns_class_name: str) -> UMLClassView | None:
+        """
+        Asynchronously retrieves the UML 2.0 format class details of the namespace-prefixed SPO object.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which to retrieve UML class details.
+
+        Returns:
+            Union[UMLClassView, None]: A UMLClassView object representing the UML 2.0 format class details,
+                                       or None if the details are not available.
+        """
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_CLASS_VIEW)
+        if not rows:
+            return None
+        class_view = UMLClassView.model_validate_json(rows[0].object_)
+        return class_view
+
+    async def _get_source_code(self, ns_class_name: str) -> str:
+        """
+        Asynchronously retrieves the source code of the namespace-prefixed SPO object.
+
+        Args:
+            ns_class_name (str): The namespace-prefixed class name for which to retrieve the source code.
+
+        Returns:
+            str: A string containing the source code of the specified namespace-prefixed class.
+        """
+        rows = await self.graph_db.select(subject=ns_class_name, predicate=GraphKeyword.HAS_PAGE_INFO)
+        filename = split_namespace(ns_class_name=ns_class_name)[0]
+        if not rows:
+            src_filename = RebuildSequenceView._get_full_filename(root=self.i_context, pathname=filename)
+            if not src_filename:
+                return ""
+            return await aread(filename=src_filename, encoding="utf-8")
+        code_block_info = CodeBlockInfo.model_validate_json(rows[0].object_)
+        return await read_file_block(
+            filename=filename, lineno=code_block_info.lineno, end_lineno=code_block_info.end_lineno
        )
-        await graph_db.insert(subject=filename, predicate=GraphKeyword.HAS_SEQUENCE_VIEW, object_=data)
-        logger.info(data)

    @staticmethod
    def _get_full_filename(root: str | Path, pathname: str | Path) -> Path | None:
+        """
+        Convert package name to the full path of the module.
+
+        Args:
+            root (Union[str, Path]): The root path or string representing the package.
+            pathname (Union[str, Path]): The pathname or string representing the module.
+
+        Returns:
+            Union[Path, None]: The full path of the module, or None if the path cannot be determined.
+
+        Examples:
+            If `root`(workdir) is "/User/xxx/github/MetaGPT/metagpt", and the `pathname` is
+            "metagpt/management/skill_manager.py", then the returned value will be
+            "/User/xxx/github/MetaGPT/metagpt/management/skill_manager.py"
+        """
+        if re.match(r"^/.+", pathname):
+            return pathname
        files = list_files(root=root)
        postfix = "/" + str(pathname)
        for i in files:
            if str(i).endswith(postfix):
                return i
        return None
+
+    @staticmethod
+    def parse_participant(mermaid_sequence_diagram: str) -> List[str]:
+        """
+        Parses the provided Mermaid sequence diagram and returns the list of participants.
+
+        Args:
+            mermaid_sequence_diagram (str): The Mermaid sequence diagram string to be parsed.
+
+        Returns:
+            List[str]: A list of participants extracted from the sequence diagram.
+        """
+        pattern = r"participant ([a-zA-Z\.0-9_]+)"
+        matches = re.findall(pattern, mermaid_sequence_diagram)
+        matches = [re.sub(r"[\\/'\"]+", "", i) for i in matches]
+        return matches
+
+    async def _search_new_participant(self, entry: SPO) -> str | None:
+        """
+        Asynchronously retrieves a participant whose sequence diagram has not been augmented.
+
+        Args:
+            entry (SPO): The SPO object representing the relationship in the graph database.
+
+        Returns:
+            Union[str, None]: A participant whose sequence diagram has not been augmented, or None if not found.
+        """
+        rows = await self.graph_db.select(subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW)
+        if not rows:
+            return None
+        sequence_view = rows[0].object_
+        rows = await self.graph_db.select(subject=entry.subject, predicate=GraphKeyword.HAS_PARTICIPANT)
+        merged_participants = []
+        for r in rows:
+            name = split_namespace(r.object_)[-1]
+            merged_participants.append(name)
+        participants = self.parse_participant(sequence_view)
+        for p in participants:
+            if p in merged_participants:
+                continue
+            return p
+        return None
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=20),
+        stop=stop_after_attempt(6),
+        after=general_after_log(logger),
+    )
+    async def _merge_participant(self, entry: SPO, class_name: str):
+        """
+        Augments the sequence diagram of `class_name` to the sequence diagram of `entry`.
+
+        Args:
+            entry (SPO): The SPO object representing the base sequence diagram.
+            class_name (str): The class name whose sequence diagram is to be augmented.
+        """
+        rows = await self.graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
+        participants = []
+        for r in rows:
+            name = split_namespace(r.subject)[-1]
+            if name == class_name:
+                participants.append(r)
+        if len(participants) == 0:  # external participants
+            await self.graph_db.insert(
+                subject=entry.subject, predicate=GraphKeyword.HAS_PARTICIPANT, object_=concat_namespace("?", class_name)
+            )
+            return
+        if len(participants) > 1:
+            for r in participants:
+                await self.graph_db.insert(
+                    subject=entry.subject, predicate=GraphKeyword.HAS_PARTICIPANT, object_=auto_namespace(r.subject)
+                )
+            return
+
+        participant = participants[0]
+        await self._rebuild_sequence_view(participant.subject)
+        sequence_views = await self.graph_db.select(
+            subject=participant.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW
+        )
+        if not sequence_views:  # external class
+            return
+        rows = await self.graph_db.select(subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW)
+        prompt = f"```mermaid\n{sequence_views[0].object_}\n```\n---\n```mermaid\n{rows[0].object_}\n```"
+
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a tool to merge sequence diagrams into one.",
+                "Participants with the same name are considered identical.",
+                "Return the merged Mermaid sequence diagram in a markdown code block format.",
+            ],
+            stream=False,
+        )
+
+        sequence_view = rsp.removeprefix("```mermaid").removesuffix("```")
+        rows = await self.graph_db.select(subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW)
+        for r in rows:
+            await self.graph_db.delete(subject=r.subject, predicate=r.predicate, object_=r.object_)
+        await self.graph_db.insert(
+            subject=entry.subject, predicate=GraphKeyword.HAS_SEQUENCE_VIEW, object_=sequence_view
+        )
+        await self.graph_db.insert(
+            subject=entry.subject,
+            predicate=GraphKeyword.HAS_SEQUENCE_VIEW_VER,
+            object_=concat_namespace(datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3], add_affix(sequence_view)),
+        )
+        await self.graph_db.insert(
+            subject=entry.subject, predicate=GraphKeyword.HAS_PARTICIPANT, object_=auto_namespace(participant.subject)
+        )
+        await self._save_sequence_view(subject=entry.subject, content=sequence_view)
+
+    async def _save_sequence_view(self, subject: str, content: str):
+        pattern = re.compile(r"[^a-zA-Z0-9]")
+        name = re.sub(pattern, "_", subject)
+        filename = Path(name).with_suffix(".sequence_diagram.mmd")
+        await self.context.repo.resources.data_api_design.save(filename=str(filename), content=content)
+
+    async def _search_participants(self, filename: str) -> Set:
+        content = await self._get_source_code(filename)
+
+        rsp = await self.llm.aask(
+            msg=content,
+            system_msgs=[
+                "You are a tool for listing all class names used in a source file.",
+                "Return a markdown JSON object with: "
+                '- a "class_names" key containing the list of class names used in the file; '
+                '- a "reasons" key lists all reason objects, each object containing a "class_name" key for class name, a "reference" key explaining the line where the class has been used.',
+            ],
+        )
+
+        class _Data(BaseModel):
+            class_names: List[str]
+            reasons: List
+
+        json_blocks = parse_json_code_block(rsp)
+        data = _Data.model_validate_json(json_blocks[0])
+        return set(data.class_names)
--- a/metagpt/actions/rebuild_sequence_view_an.py
+++ b/metagpt/actions/rebuild_sequence_view_an.py
@ -1,16 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-@Time    : 2024/1/4
-@Author  : mashenquan
-@File    : rebuild_sequence_view_an.py
-"""
-from metagpt.actions.action_node import ActionNode
-from metagpt.utils.mermaid import MMC2
-
-CODE_2_MERMAID_SEQUENCE_DIAGRAM = ActionNode(
-    key="Program call flow",
-    expected_type=str,
-    instruction='Translate the "context" content into "format example" format.',
-    example=MMC2,
-)
--- a/metagpt/actions/skill_action.py
+++ b/metagpt/actions/skill_action.py
@ -50,6 +50,7 @@ class ArgumentsParingAction(Action):
        rsp = await self.llm.aask(
            msg=prompt,
            system_msgs=["You are a function parser.", "You can convert spoken words into function parameters."],
+            stream=False,
        )
        logger.debug(f"SKILL:{prompt}\n, RESULT:{rsp}")
        self.args = ArgumentsParingAction.parse_arguments(skill_name=self.skill.name, txt=rsp)
--- a/metagpt/actions/talk_action.py
+++ b/metagpt/actions/talk_action.py
@ -92,7 +92,7 @@ class TalkAction(Action):

    async def run(self, with_message=None, **kwargs) -> Message:
        msg, format_msgs, system_msgs = self.aask_args
-        rsp = await self.llm.aask(msg=msg, format_msgs=format_msgs, system_msgs=system_msgs)
+        rsp = await self.llm.aask(msg=msg, format_msgs=format_msgs, system_msgs=system_msgs, stream=False)
        self.rsp = Message(content=rsp, role="assistant", cause_by=self)
        return self.rsp

--- a/metagpt/actions/write_code_plan_and_change_an.py
+++ b/metagpt/actions/write_code_plan_and_change_an.py
@ -6,30 +6,44 @@
@File    : write_code_plan_and_change_an.py
 """
 import os
+from typing import List

 from pydantic import Field

 from metagpt.actions.action import Action
 from metagpt.actions.action_node import ActionNode
+from metagpt.logs import logger
 from metagpt.schema import CodePlanAndChangeContext

-CODE_PLAN_AND_CHANGE = ActionNode(
-    key="Code Plan And Change",
-    expected_type=str,
-    instruction="Developing comprehensive and step-by-step incremental development plan, and write Incremental "
-    "Change by making a code draft that how to implement incremental development including detailed steps based on the "
-    "context. Note: Track incremental changes using mark of '+' or '-' for add/modify/delete code, and conforms to the "
-    "output format of git diff",
-    example="""
-1. Plan for calculator.py: Enhance the functionality of `calculator.py` by extending it to incorporate methods for subtraction, multiplication, and division. Additionally, implement robust error handling for the division operation to mitigate potential issues related to division by zero. 
-```python
+DEVELOPMENT_PLAN = ActionNode(
+    key="Development Plan",
+    expected_type=List[str],
+    instruction="Develop a comprehensive and step-by-step incremental development plan, providing the detail "
+    "changes to be implemented at each step based on the order of 'Task List'",
+    example=[
+        "Enhance the functionality of `calculator.py` by extending it to incorporate methods for subtraction, ...",
+        "Update the existing codebase in main.py to incorporate new API endpoints for subtraction, ...",
+    ],
+)
+
+INCREMENTAL_CHANGE = ActionNode(
+    key="Incremental Change",
+    expected_type=List[str],
+    instruction="Write Incremental Change by making a code draft that how to implement incremental development "
+    "including detailed steps based on the context. Note: Track incremental changes using the marks `+` and `-` to "
+    "indicate additions and deletions, and ensure compliance with the output format of `git diff`",
+    example=[
+        '''```diff
+--- Old/calculator.py
+++ New/calculator.py
+
 class Calculator:
         self.result = number1 + number2
         return self.result

 -    def sub(self, number1, number2) -> float:
 +    def subtract(self, number1: float, number2: float) -> float:
-+        '''
+        """
 +        Subtracts the second number from the first and returns the result.
 +
 +        Args:
@ -38,13 +52,13 @@ class Calculator:
 +
 +        Returns:
 +            float: The difference of number1 and number2.
-+        '''
+        """
 +        self.result = number1 - number2
 +        return self.result
 +
    def multiply(self, number1: float, number2: float) -> float:
 -        pass
-+        '''
+        """
 +        Multiplies two numbers and returns the result.
 +
 +        Args:
@ -53,15 +67,15 @@ class Calculator:
 +
 +        Returns:
 +            float: The product of number1 and number2.
-+        '''
+        """
 +        self.result = number1 * number2
 +        return self.result
 +
    def divide(self, number1: float, number2: float) -> float:
 -        pass
-+        '''
+        """
 +            ValueError: If the second number is zero.
-+        '''
+        """
 +        if number2 == 0:
 +            raise ValueError('Cannot divide by zero')
 +        self.result = number1 / number2
@ -75,10 +89,11 @@ class Calculator:
 +            print("Result is already zero, no need to clear.")
 +
         self.result = 0.0
-```
+```''',
+        """```diff
+--- Old/main.py
+++ New/main.py

-2. Plan for main.py: Integrate new API endpoints for subtraction, multiplication, and division into the existing codebase of `main.py`. Then, ensure seamless integration with the overall application architecture and maintain consistency with coding standards.
-```python
 def add_numbers():
     result = calculator.add_numbers(num1, num2)
     return jsonify({'result': result}), 200
@ -106,6 +121,7 @@ def add_numbers():
 if __name__ == '__main__':
     app.run()
 ```""",
+    ],
 )

 CODE_PLAN_AND_CHANGE_CONTEXT = """
@ -172,14 +188,16 @@ Role: You are a professional engineer; The main goal is to complete incremental
 2. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.
 3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. AVOID circular import.
 4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.
-5. Follow Code Plan And Change: If there is any Incremental Change that is marked by the git diff format using '+' and '-' for add/modify/delete code, or Legacy Code files contain "{filename} to be rewritten", you must merge it into the code file according to the plan. 
+5. Follow Code Plan And Change: If there is any "Incremental Change" that is marked by the git diff format with '+' and '-' symbols, or Legacy Code files contain "{filename} to be rewritten", you must merge it into the code file according to the "Development Plan". 
 6. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.
 7. Before using a external variable/module, make sure you import it first.
 8. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.
 9. Attention: Retain details that are not related to incremental development but are important for maintaining the consistency and clarity of the old code.
 """

-WRITE_CODE_PLAN_AND_CHANGE_NODE = ActionNode.from_children("WriteCodePlanAndChange", [CODE_PLAN_AND_CHANGE])
+CODE_PLAN_AND_CHANGE = [DEVELOPMENT_PLAN, INCREMENTAL_CHANGE]
+
+WRITE_CODE_PLAN_AND_CHANGE_NODE = ActionNode.from_children("WriteCodePlanAndChange", CODE_PLAN_AND_CHANGE)


 class WriteCodePlanAndChange(Action):
@ -192,14 +210,14 @@ class WriteCodePlanAndChange(Action):
        prd_doc = await self.repo.docs.prd.get(filename=self.i_context.prd_filename)
        design_doc = await self.repo.docs.system_design.get(filename=self.i_context.design_filename)
        task_doc = await self.repo.docs.task.get(filename=self.i_context.task_filename)
-        code_text = await self.get_old_codes()
        context = CODE_PLAN_AND_CHANGE_CONTEXT.format(
            requirement=self.i_context.requirement,
            prd=prd_doc.content,
            design=design_doc.content,
            task=task_doc.content,
-            code=code_text,
+            code=await self.get_old_codes(),
        )
+        logger.info("Writing code plan and change..")
        return await WRITE_CODE_PLAN_AND_CHANGE_NODE.fill(context=context, llm=self.llm, schema="json")

    async def get_old_codes(self) -> str:
--- a/metagpt/actions/write_prd_an.py
+++ b/metagpt/actions/write_prd_an.py
@ -56,7 +56,7 @@ REFINED_PRODUCT_GOALS = ActionNode(
    key="Refined Product Goals",
    expected_type=List[str],
    instruction="Update and expand the original product goals to reflect the evolving needs due to incremental "
-    "development.Ensure that the refined goals align with the current project direction and contribute to its success.",
+    "development. Ensure that the refined goals align with the current project direction and contribute to its success.",
    example=[
        "Enhance user engagement through new features",
        "Optimize performance for scalability",
--- a/metagpt/configs/llm_config.py
+++ b/metagpt/configs/llm_config.py
@ -16,6 +16,7 @@ from metagpt.utils.yaml_model import YamlModel
 class LLMType(Enum):
    OPENAI = "openai"
    ANTHROPIC = "anthropic"
+    CLAUDE = "claude"  # alias name of anthropic
    SPARK = "spark"
    ZHIPUAI = "zhipuai"
    FIREWORKS = "fireworks"
@ -46,6 +47,7 @@ class LLMConfig(YamlModel):
    api_version: Optional[str] = None

    model: Optional[str] = None  # also stands for DEPLOYMENT_NAME
+    pricing_plan: Optional[str] = None  # Cost Settlement Plan Parameters.

    # For Cloud Service Provider like Baidu/ Alibaba
    access_key: Optional[str] = None
--- a/metagpt/const.py
+++ b/metagpt/const.py
@ -104,6 +104,7 @@ CODE_SUMMARIES_PDF_FILE_REPO = "resources/code_summary"
 RESOURCES_FILE_REPO = "resources"
 SD_OUTPUT_FILE_REPO = "resources/sd_output"
 GRAPH_REPO_FILE_REPO = "docs/graph_repo"
+VISUAL_GRAPH_REPO_FILE_REPO = "resources/graph_db"
 CLASS_VIEW_FILE_REPO = "docs/class_view"

 YAPI_URL = "http://yapi.deepwisdomai.com/"
--- a/metagpt/document.py
+++ b/metagpt/document.py
@ -11,15 +11,16 @@ from pathlib import Path
 from typing import Optional, Union

 import pandas as pd
-from langchain.document_loaders import (
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import (
    TextLoader,
    UnstructuredPDFLoader,
    UnstructuredWordDocumentLoader,
 )
-from langchain.text_splitter import CharacterTextSplitter
 from pydantic import BaseModel, ConfigDict, Field
 from tqdm import tqdm

+from metagpt.logs import logger
 from metagpt.repo_parser import RepoParser


@ -130,9 +131,12 @@ class IndexableDocument(Document):
        if isinstance(data, pd.DataFrame):
            validate_cols(content_col, data)
            return cls(data=data, content=str(data), content_col=content_col, meta_col=meta_col)
-        else:
+        try:
            content = data_path.read_text()
-            return cls(data=data, content=content, content_col=content_col, meta_col=meta_col)
+        except Exception as e:
+            logger.debug(f"Load {str(data_path)} error: {e}")
+            content = ""
+        return cls(data=data, content=content, content_col=content_col, meta_col=meta_col)

    def _get_docs_and_metadatas_by_df(self) -> (list, list):
        df = self.data
--- a/metagpt/memory/brain_memory.py
+++ b/metagpt/memory/brain_memory.py
@ -186,7 +186,7 @@ class BrainMemory(BaseModel):
        summaries = [summary, command]
        msg = "\n".join(summaries)
        logger.debug(f"title ask:{msg}")
-        response = await llm.aask(msg=msg, system_msgs=[])
+        response = await llm.aask(msg=msg, system_msgs=[], stream=False)
        logger.debug(f"title rsp: {response}")
        return response

@ -201,11 +201,15 @@ class BrainMemory(BaseModel):

    @staticmethod
    async def _openai_is_related(text1, text2, llm, **kwargs):
-        command = (
-            f"{text2}\n\nIs there any sentence above related to the following sentence: {text1}.\nIf is there "
-            "any relevance, return [TRUE] brief and clear. Otherwise, return [FALSE] brief and clear."
+        context = f"## Paragraph 1\n{text2}\n---\n## Paragraph 2\n{text1}\n"
+        rsp = await llm.aask(
+            msg=context,
+            system_msgs=[
+                "You are a tool capable of determining whether two paragraphs are semantically related."
+                'Return "TRUE" if "Paragraph 1" is semantically relevant to "Paragraph 2", otherwise return "FALSE".'
+            ],
+            stream=False,
        )
-        rsp = await llm.aask(msg=command, system_msgs=[])
        result = True if "TRUE" in rsp else False
        p2 = text2.replace("\n", "")
        p1 = text1.replace("\n", "")
@ -223,12 +227,17 @@ class BrainMemory(BaseModel):

    @staticmethod
    async def _openai_rewrite(sentence: str, context: str, llm):
-        command = (
-            f"{context}\n\nExtract relevant information from every preceding sentence and use it to succinctly "
-            f"supplement or rewrite the following text in brief and clear:\n{sentence}"
+        prompt = f"## Context\n{context}\n---\n## Sentence\n{sentence}\n"
+        rsp = await llm.aask(
+            msg=prompt,
+            system_msgs=[
+                'You are a tool augmenting the "Sentence" with information from the "Context".',
+                "Do not supplement the context with information that is not present, especially regarding the subject and object.",
+                "Return the augmented sentence.",
+            ],
+            stream=False,
        )
-        rsp = await llm.aask(msg=command, system_msgs=[])
-        logger.info(f"REWRITE:\nCommand: {command}\nRESULT: {rsp}\n")
+        logger.info(f"REWRITE:\nCommand: {prompt}\nRESULT: {rsp}\n")
        return rsp

    @staticmethod
@ -293,14 +302,14 @@ class BrainMemory(BaseModel):
        """Generate text summary"""
        if len(text) < max_words:
            return text
+        system_msgs = [
+            "You are a tool for summarizing and abstracting text.",
+            f"Return the summarized text to less than {max_words} words.",
+        ]
        if keep_language:
-            command = f".Translate the above content into a summary of less than {max_words} words in language of the content strictly."
-        else:
-            command = f"Translate the above content into a summary of less than {max_words} words."
-        msg = text + "\n\n" + command
-        logger.debug(f"summary ask:{msg}")
-        response = await self.llm.aask(msg=msg, system_msgs=[])
-        logger.debug(f"summary rsp: {response}")
+            system_msgs.append("The generated summary should be in the same language as the original text.")
+        response = await self.llm.aask(msg=text, system_msgs=system_msgs, stream=False)
+        logger.debug(f"{text}\nsummary rsp: {response}")
        return response

    @staticmethod
--- a/metagpt/prompts/di/init.py
+++ b/metagpt/prompts/di/init.py
--- a/metagpt/prompts/di/write_analysis_code.py
+++ b/metagpt/prompts/di/write_analysis_code.py
@ -0,0 +1,112 @@
+INTERPRETER_SYSTEM_MSG = """As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function."""
+
+STRUCTUAL_PROMPT = """
+# User Requirement
+{user_requirement}
+
+# Plan Status
+{plan_status}
+
+# Tool Info
+{tool_info}
+
+# Constraints
+- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.
+- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.
+- Always prioritize using pre-defined tools for the same functionality.
+
+# Output
+While some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:
+```python
+your code
+```
+"""
+
+REFLECTION_SYSTEM_MSG = """You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation."""
+
+DEBUG_REFLECTION_EXAMPLE = '''
+[previous impl]:
+assistant:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a - b
+```
+
+user:
+Tests failed:
+assert add(1, 2) == 3 # output: -1
+assert add(1, 2) == 4 # output: -1
+
+[reflection on previous impl]:
+The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
+
+[improved impl]:
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a + b
+'''
+
+REFLECTION_PROMPT = """
+[example]
+Here is an example of debugging with reflection.
+{debug_example}
+[/example]
+
+[context]
+{context}
+
+[previous impl]:
+{previous_impl}
+
+[instruction]
+Analyze your previous code and error in [context] step by step, provide me with improved method and code. Remember to follow [context] requirement. Don't forget to write code for steps behind the error step.
+Output a json following the format:
+```json
+{{
+    "reflection": str = "Reflection on previous implementation",
+    "improved_impl": str = "Refined code after reflection.",
+}}
+```
+"""
+
+CHECK_DATA_PROMPT = """
+# Background
+Check latest data info to guide subsequent tasks.
+
+## Finished Tasks
+```python
+{code_written}
+```end
+
+# Task
+Check code in finished tasks, print key variables to guide your following actions.
+Specifically, if it is a data analysis or machine learning task, print the the latest column information using the following code, with DataFrame variable from 'Finished Tasks' in place of df:
+```python
+from metagpt.tools.libs.data_preprocess import get_column_info
+
+column_info = get_column_info(df)
+print("column_info")
+print(column_info)
+```end
+Otherwise, print out any key variables you see fit. Return an empty string if you think there is no important data to check.
+
+# Constraints:
+- Your code is to be added to a new cell in jupyter.
+
+# Instruction
+Output code following the format:
+```python
+your code
+```
+"""
+
+DATA_INFO = """
+# Latest Data Info
+Latest data info after previous tasks:
+{info}
+"""
--- a/metagpt/prompts/mi/ml_action.py
+++ b/metagpt/prompts/mi/ml_action.py
@ -1,128 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/24 15:43
-# @Author  : lidanyang
-# @File    : ml_action
-# @Desc    :
-UPDATE_DATA_COLUMNS = """
-# Background
-Keep dataset column information updated before model train.
-## Done Tasks
-```python
-{history_code}
-```end
-
-# Task
-Update and print the dataset's column information only if the train or test data has changed. Use the following code:
-```python
-from metagpt.tools.libs.data_preprocess import get_column_info
-
-column_info = get_column_info(df)
-print("column_info")
-print(column_info)
-```end
-
-# Constraints:
- Use the DataFrame variable from 'Done Tasks' in place of df.
- Import `get_column_info` only if it's not already imported.
-"""
-
-PRINT_DATA_COLUMNS = {
-    "name": "print_column_info",
-    "description": "Print the latest column information after 'Done Tasks' code if first read or data changed.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "code": {
-                "type": "string",
-                "description": "The code to be added to a new cell in jupyter.",
-            },
-        },
-        "required": ["code"],
-    },
-}
-
-ML_COMMON_PROMPT = """
-# Background
-As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
-
-## Done Tasks
-```python
-{history_code}
-```end
-
-## Current Task
-{current_task}
-
-# Latest Data Info
-Latest data info after previous tasks:
-{column_info}
-
-# Task
-Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
-Specifically, {tool_type_usage_prompt}
-"""
-
-USE_NO_TOOLS_EXAMPLE = """
-# Output Example:
-when current task is "train a lightgbm model on training data", the code can be like:
-```python
-# Step 1: check data type and convert to numeric
-obj_cols = train.select_dtypes(include='object').columns.tolist()
-
-for col in obj_cols:
-    encoder = LabelEncoder()
-    train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])
-    test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')
-    test[col] = encoder.transform(test[col])
-
-# Step 2: train lightgbm model
-model = LGBMClassifier()
-model.fit(train, y_train)
-```end
-
-# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
-"""
-
-USE_TOOLS_EXAMPLE = """
-# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
-
-# Available Tools:
-Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
-{tool_schemas}
-
-# Output Example:
-when current task is "do data preprocess, like fill missing value, handle outliers, etc.", the code can be like:
-```python
-# Step 1: fill missing value
-# Tools used: ['FillMissingValue']
-from metagpt.tools.libs.data_preprocess import FillMissingValue
-
-train_processed = train.copy()
-test_processed = test.copy()
-num_cols = train_processed.select_dtypes(include='number').columns.tolist()
-if 'label' in num_cols:
-    num_cols.remove('label')
-fill_missing_value = FillMissingValue(features=num_cols, strategy='mean')
-fill_missing_value.fit(train_processed)
-train_processed = fill_missing_value.transform(train_processed)
-test_processed = fill_missing_value.transform(test_processed)
-
-# Step 2: handle outliers
-for col in num_cols:
-    low, high = train_processed[col].quantile([0.01, 0.99])
-    train_processed[col] = train_processed[col].clip(low, high)
-    test_processed[col] = test_processed[col].clip(low, high)
-```end
-
-# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
- Always copy the DataFrame before processing it and use the copy to process.
-"""
-
-ML_GENERATE_CODE_PROMPT = ML_COMMON_PROMPT + USE_NO_TOOLS_EXAMPLE
-ML_TOOL_USAGE_PROMPT = ML_COMMON_PROMPT + USE_TOOLS_EXAMPLE
--- a/metagpt/prompts/mi/write_analysis_code.py
+++ b/metagpt/prompts/mi/write_analysis_code.py
@ -1,93 +0,0 @@
-ASSIGN_TASK_TYPE_PROMPT = """
-Please assign a task type to each task in the list below from the given categories:
-{task_info}
-
-## All Task Type:
-{task_type_desc}
-"""
-
-ASSIGN_TASK_TYPE_CONFIG = {
-    "name": "assign_task_type",
-    "description": "Assign task type to each task by order.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "task_type": {
-                "type": "array",
-                "description": "List of task type. The length should as long as task list",
-                "items": {
-                    "type": "string",
-                },
-            },
-        },
-        "required": ["task_type"],
-    },
-}
-
-TOOL_RECOMMENDATION_PROMPT = """
-## User Requirement:
-{current_task}
-
-## Task
-Recommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'. 
-
-## Available Tools:
-{available_tools}
-
-## Tool Selection and Instructions:
- Select tools most relevant to completing the 'User Requirement'.
- If you believe that no tools are suitable, indicate with an empty list.
- Only list the names of the tools, not the full schema of each tool.
- Ensure selected tools are listed in 'Available Tools'.
-"""
-
-SELECT_FUNCTION_TOOLS = {
-    "name": "select_function_tools",
-    "description": "For current task, select suitable tools for it.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "recommend_tools": {
-                "type": "array",
-                "description": "List of tool names. Empty list if no tool is suitable.",
-                "items": {
-                    "type": "string",
-                },
-            },
-        },
-        "required": ["recommend_tools"],
-    },
-}
-
-CODE_GENERATOR_WITH_TOOLS = {
-    "name": "add_subtask_code",
-    "description": "Add new code cell of current task to the end of an active Jupyter notebook.",
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "code": {
-                "type": "string",
-                "description": "The code to be added to a new cell in jupyter.",
-            },
-        },
-        "required": ["code"],
-    },
-}
-
-TOOL_USAGE_PROMPT = """
-# Instruction
-Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
-Specifically, {tool_type_usage_prompt}
-
-# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
-
-# Available Tools (can be empty):
-Each Class tool is described in JSON format. When you call a tool, import the tool first.
-{tool_schemas}
-
-# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
-"""
--- a/metagpt/prompts/tool_types.py
+++ b/metagpt/prompts/tool_types.py
@ -1,11 +1,11 @@
-# Prompt for using tools of "eda" type
+# Prompt for taking on "eda" tasks
 EDA_PROMPT = """
 The current task is about exploratory data analysis, please note the following:
 - Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.
 - Remember to `import numpy as np` before using Numpy functions.
 """

-# Prompt for using tools of "data_preprocess" type
+# Prompt for taking on "data_preprocess" tasks
 DATA_PREPROCESS_PROMPT = """
 The current task is about data preprocessing, please note the following:
 - Monitor data types per column, applying appropriate methods.
@ -15,9 +15,10 @@ The current task is about data preprocessing, please note the following:
 - Prefer alternatives to one-hot encoding for categorical data.
 - Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
 - Each step do data preprocessing to train, must do same for test separately at the same time.
+- Always copy the DataFrame before processing it and use the copy to process.
 """

-# Prompt for using tools of "feature_engineering" type
+# Prompt for taking on "feature_engineering" tasks
 FEATURE_ENGINEERING_PROMPT = """
 The current task is about feature engineering. when performing it, please adhere to the following principles:
 - Generate as diverse features as possible to improve the model's performance step-by-step. 
@ -27,9 +28,10 @@ The current task is about feature engineering. when performing it, please adhere
 - Each feature engineering operation performed on the train set must also applies to the test separately at the same time.
 - Avoid using the label column to create features, except for cat encoding.
 - Use the data from previous task result if exist, do not mock or reload data yourself.
+- Always copy the DataFrame before processing it and use the copy to process.
 """

-# Prompt for using tools of "model_train" type
+# Prompt for taking on "model_train" tasks
 MODEL_TRAIN_PROMPT = """
 The current task is about training a model, please ensure high performance:
 - Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
@ -38,14 +40,14 @@ The current task is about training a model, please ensure high performance:
 - Set suitable hyperparameters for the model, make metrics as high as possible.
 """

-# Prompt for using tools of "model_evaluate" type
+# Prompt for taking on "model_evaluate" tasks
 MODEL_EVALUATE_PROMPT = """
 The current task is about evaluating a model, please note the following:
 - Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
 - Use trained model from previous task result directly, do not mock or reload model yourself.
 """

-# Prompt for using tools of "vision" type
+# Prompt for taking on "image2webpage" tasks
 IMAGE2WEBPAGE_PROMPT = """
 The current task is about converting image into webpage code. please note the following:
 - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
--- a/metagpt/provider/init.py
+++ b/metagpt/provider/init.py
@ -16,6 +16,7 @@ from metagpt.provider.human_provider import HumanProvider
 from metagpt.provider.spark_api import SparkLLM
 from metagpt.provider.qianfan_api import QianFanLLM
 from metagpt.provider.dashscope_api import DashScopeLLM
+from metagpt.provider.anthropic_api import AnthropicLLM

 __all__ = [
    "GeminiLLM",
@ -28,4 +29,5 @@ __all__ = [
    "SparkLLM",
    "QianFanLLM",
    "DashScopeLLM",
+    "AnthropicLLM",
 ]
--- a/metagpt/provider/anthropic_api.py
+++ b/metagpt/provider/anthropic_api.py
@ -1,37 +1,71 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-"""
-@Time    : 2023/7/21 11:15
-@Author  : Leo Xiao
-@File    : anthropic_api.py
-"""

-import anthropic
-from anthropic import Anthropic, AsyncAnthropic
+from anthropic import AsyncAnthropic
+from anthropic.types import Message, Usage

-from metagpt.configs.llm_config import LLMConfig
+from metagpt.configs.llm_config import LLMConfig, LLMType
+from metagpt.logs import log_llm_stream
+from metagpt.provider.base_llm import BaseLLM
+from metagpt.provider.llm_provider_registry import register_provider


-class Claude2:
+@register_provider([LLMType.ANTHROPIC, LLMType.CLAUDE])
+class AnthropicLLM(BaseLLM):
    def __init__(self, config: LLMConfig):
        self.config = config
+        self.__init_anthropic()

-    def ask(self, prompt: str) -> str:
-        client = Anthropic(api_key=self.config.api_key)
+    def __init_anthropic(self):
+        self.model = self.config.model
+        self.aclient: AsyncAnthropic = AsyncAnthropic(api_key=self.config.api_key, base_url=self.config.base_url)

-        res = client.completions.create(
-            model="claude-2",
-            prompt=f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}",
-            max_tokens_to_sample=1000,
-        )
-        return res.completion
+    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
+        kwargs = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": self.config.max_token,
+            "stream": stream,
+        }
+        if self.use_system_prompt:
+            # if the model support system prompt, extract and pass it
+            if messages[0]["role"] == "system":
+                kwargs["messages"] = messages[1:]
+                kwargs["system"] = messages[0]["content"]  # set system prompt here
+        return kwargs

-    async def aask(self, prompt: str) -> str:
-        aclient = AsyncAnthropic(api_key=self.config.api_key)
+    def _update_costs(self, usage: Usage, model: str = None, local_calc_usage: bool = True):
+        usage = {"prompt_tokens": usage.input_tokens, "completion_tokens": usage.output_tokens}
+        super()._update_costs(usage, model)

-        res = await aclient.completions.create(
-            model="claude-2",
-            prompt=f"{anthropic.HUMAN_PROMPT} {prompt} {anthropic.AI_PROMPT}",
-            max_tokens_to_sample=1000,
-        )
-        return res.completion
+    def get_choice_text(self, resp: Message) -> str:
+        return resp.content[0].text
+
+    async def _achat_completion(self, messages: list[dict], timeout: int = 3) -> Message:
+        resp: Message = await self.aclient.messages.create(**self._const_kwargs(messages))
+        self._update_costs(resp.usage, self.model)
+        return resp
+
+    async def acompletion(self, messages: list[dict], timeout: int = 3) -> Message:
+        return await self._achat_completion(messages, timeout=timeout)
+
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
+        stream = await self.aclient.messages.create(**self._const_kwargs(messages, stream=True))
+        collected_content = []
+        usage = Usage(input_tokens=0, output_tokens=0)
+        async for event in stream:
+            event_type = event.type
+            if event_type == "message_start":
+                usage.input_tokens = event.message.usage.input_tokens
+                usage.output_tokens = event.message.usage.output_tokens
+            elif event_type == "content_block_delta":
+                content = event.delta.text
+                log_llm_stream(content)
+                collected_content.append(content)
+            elif event_type == "message_delta":
+                usage.output_tokens = event.usage.output_tokens  # update final output_tokens
+
+        log_llm_stream("\n")
+        self._update_costs(usage)
+        full_content = "".join(collected_content)
+        return full_content
--- a/metagpt/provider/azure_openai_api.py
+++ b/metagpt/provider/azure_openai_api.py
@ -6,8 +6,6 @@
@Modified By: mashenquan, 2023/11/21. Fix bug: ReadTimeout.
@Modified By: mashenquan, 2023/12/1. Fix bug: Unclosed connection caused by openai 0.x.
 """
-
-
 from openai import AsyncAzureOpenAI
 from openai._base_client import AsyncHttpxClientWrapper

@ -27,6 +25,7 @@ class AzureOpenAILLM(OpenAILLM):
        # https://learn.microsoft.com/zh-cn/azure/ai-services/openai/how-to/migration?tabs=python-new%2Cdalle-fix
        self.aclient = AsyncAzureOpenAI(**kwargs)
        self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
+        self.pricing_plan = self.config.pricing_plan or self.model

    def _make_client_kwargs(self) -> dict:
        kwargs = dict(
--- a/metagpt/provider/base_llm.py
+++ b/metagpt/provider/base_llm.py
@ -6,17 +6,29 @@
@File    : base_llm.py
@Desc    : mashenquan, 2023/8/22. + try catch
 """
+from __future__ import annotations
+
 import json
 from abc import ABC, abstractmethod
-from typing import Optional, Union
+from typing import Dict, Optional, Union

 from openai import AsyncOpenAI
+from openai.types import CompletionUsage
 from pydantic import BaseModel
+from tenacity import (
+    after_log,
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)

 from metagpt.configs.llm_config import LLMConfig
 from metagpt.logs import logger
 from metagpt.schema import Message
+from metagpt.utils.common import log_and_reraise
 from metagpt.utils.cost_manager import CostManager, Costs
+from metagpt.utils.exceptions import handle_exception


 class BaseLLM(ABC):
@ -30,6 +42,7 @@ class BaseLLM(ABC):
    aclient: Optional[Union[AsyncOpenAI]] = None
    cost_manager: Optional[CostManager] = None
    model: Optional[str] = None
+    pricing_plan: Optional[str] = None

    @abstractmethod
    def __init__(self, config: LLMConfig):
@ -92,7 +105,7 @@ class BaseLLM(ABC):

    async def aask(
        self,
-        msg: str,
+        msg: Union[str, list[dict[str, str]]],
        system_msgs: Optional[list[str]] = None,
        format_msgs: Optional[list[dict[str, str]]] = None,
        images: Optional[Union[str, list[str]]] = None,
@ -107,7 +120,10 @@ class BaseLLM(ABC):
            message = []
        if format_msgs:
            message.extend(format_msgs)
-        message.append(self._user_msg(msg, images=images))
+        if isinstance(msg, str):
+            message.append(self._user_msg(msg, images=images))
+        else:
+            message.extend(msg)
        logger.debug(message)
        rsp = await self.acompletion_text(message, stream=stream, timeout=timeout)
        return rsp
@ -125,10 +141,13 @@ class BaseLLM(ABC):
            context.append(self._assistant_msg(rsp_text))
        return self._extract_assistant_rsp(context)

-    async def aask_code(self, messages: Union[str, Message, list[dict]], timeout=3) -> dict:
-        """FIXME: No code segment filtering has been done here, and all results are actually displayed"""
+    async def aask_code(self, messages: Union[str, Message, list[dict]], timeout=3, **kwargs) -> dict:
        raise NotImplementedError

+    @abstractmethod
+    async def _achat_completion(self, messages: list[dict], timeout=3):
+        """_achat_completion implemented by inherited class"""
+
    @abstractmethod
    async def acompletion(self, messages: list[dict], timeout=3):
        """Asynchronous version of completion
@ -141,8 +160,22 @@ class BaseLLM(ABC):
        """

    @abstractmethod
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
+        """_achat_completion_stream implemented by inherited class"""
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_random_exponential(min=1, max=60),
+        after=after_log(logger, logger.level("WARNING").name),
+        retry=retry_if_exception_type(ConnectionError),
+        retry_error_callback=log_and_reraise,
+    )
+    async def acompletion_text(self, messages: list[dict], stream: bool = False, timeout: int = 3) -> str:
        """Asynchronous version of completion. Return str. Support stream-print"""
+        if stream:
+            return await self._achat_completion_stream(messages, timeout=timeout)
+        resp = await self._achat_completion(messages, timeout=timeout)
+        return self.get_choice_text(resp)

    def get_choice_text(self, rsp: dict) -> str:
        """Required to provide the first text of choice"""
@ -192,6 +225,20 @@ class BaseLLM(ABC):
        """
        return json.loads(self.get_choice_function(rsp)["arguments"], strict=False)

+    @handle_exception
+    def _update_costs(self, usage: CompletionUsage | Dict):
+        """
+        Updates the costs based on the provided usage information.
+        """
+        if self.config.calc_usage and usage and self.cost_manager:
+            if isinstance(usage, Dict):
+                prompt_tokens = int(usage.get("prompt_tokens", 0))
+                completion_tokens = int(usage.get("completion_tokens", 0))
+            else:
+                prompt_tokens = usage.prompt_tokens
+                completion_tokens = usage.completion_tokens
+            self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.pricing_plan)
+
    def messages_to_prompt(self, messages: list[dict]):
        """[{"role": "user", "content": msg}] to user: <msg> etc."""
        return "\n".join([f"{i['role']}: {i['content']}" for i in messages])
--- a/metagpt/provider/constant.py
+++ b/metagpt/provider/constant.py
@ -25,6 +25,7 @@ GENERAL_FUNCTION_SCHEMA = {
    },
 }

+
 # tool_choice value for general_function_schema
 # https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
 GENERAL_TOOL_CHOICE = {"type": "function", "function": {"name": "execute"}}
--- a/metagpt/provider/dashscope_api.py
+++ b/metagpt/provider/dashscope_api.py
@ -24,18 +24,10 @@ from dashscope.common.error import (
    ModelRequired,
    UnsupportedApiProtocol,
 )
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)

-from metagpt.logs import log_llm_stream, logger
+from metagpt.logs import log_llm_stream
 from metagpt.provider.base_llm import BaseLLM, LLMConfig
 from metagpt.provider.llm_provider_registry import LLMType, register_provider
-from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import CostManager
 from metagpt.utils.token_counter import DASHSCOPE_TOKEN_COSTS

@ -210,16 +202,16 @@ class DashScopeLLM(BaseLLM):
        self._update_costs(dict(resp.usage))
        return resp.output

-    async def _achat_completion(self, messages: list[dict]) -> GenerationOutput:
+    async def _achat_completion(self, messages: list[dict], timeout: int = 3) -> GenerationOutput:
        resp: GenerationResponse = await self.aclient.acall(**self._const_kwargs(messages, stream=False))
        self._check_response(resp)
        self._update_costs(dict(resp.usage))
        return resp.output

    async def acompletion(self, messages: list[dict], timeout=3) -> GenerationOutput:
-        return await self._achat_completion(messages)
+        return await self._achat_completion(messages, timeout=timeout)

-    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
        resp = await self.aclient.acall(**self._const_kwargs(messages, stream=True))
        collected_content = []
        usage = {}
@ -233,16 +225,3 @@ class DashScopeLLM(BaseLLM):
        self._update_costs(usage)
        full_content = "".join(collected_content)
        return full_content
-
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_random_exponential(min=1, max=60),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(ConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
-        if stream:
-            return await self._achat_completion_stream(messages)
-        resp = await self._achat_completion(messages)
-        return self.get_choice_text(resp)
--- a/metagpt/provider/google_gemini_api.py
+++ b/metagpt/provider/google_gemini_api.py
@ -13,19 +13,11 @@ from google.generativeai.types.generation_types import (
    GenerateContentResponse,
    GenerationConfig,
 )
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)

 from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import log_llm_stream, logger
+from metagpt.logs import log_llm_stream
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import log_and_reraise


 class GeminiGenerativeModel(GenerativeModel):
@ -55,6 +47,7 @@ class GeminiLLM(BaseLLM):
        self.__init_gemini(config)
        self.config = config
        self.model = "gemini-pro"  # so far only one model
+        self.pricing_plan = self.config.pricing_plan or self.model
        self.llm = GeminiGenerativeModel(model_name=self.model)

    def __init_gemini(self, config: LLMConfig):
@ -95,16 +88,16 @@ class GeminiLLM(BaseLLM):
        self._update_costs(usage)
        return resp

-    async def _achat_completion(self, messages: list[dict]) -> "AsyncGenerateContentResponse":
+    async def _achat_completion(self, messages: list[dict], timeout: int = 3) -> "AsyncGenerateContentResponse":
        resp: AsyncGenerateContentResponse = await self.llm.generate_content_async(**self._const_kwargs(messages))
        usage = await self.aget_usage(messages, resp.text)
        self._update_costs(usage)
        return resp

    async def acompletion(self, messages: list[dict], timeout=3) -> dict:
-        return await self._achat_completion(messages)
+        return await self._achat_completion(messages, timeout=timeout)

-    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
        resp: AsyncGenerateContentResponse = await self.llm.generate_content_async(
            **self._const_kwargs(messages, stream=True)
        )
@ -119,17 +112,3 @@ class GeminiLLM(BaseLLM):
        usage = await self.aget_usage(messages, full_content)
        self._update_costs(usage)
        return full_content
-
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_random_exponential(min=1, max=60),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(ConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
-        """response in async with stream or non-stream mode"""
-        if stream:
-            return await self._achat_completion_stream(messages)
-        resp = await self._achat_completion(messages)
-        return self.get_choice_text(resp)
--- a/metagpt/provider/human_provider.py
+++ b/metagpt/provider/human_provider.py
@ -35,10 +35,16 @@ class HumanProvider(BaseLLM):
    ) -> str:
        return self.ask(msg, timeout=timeout)

+    async def _achat_completion(self, messages: list[dict], timeout=3):
+        pass
+
    async def acompletion(self, messages: list[dict], timeout=3):
        """dummy implementation of abstract method in base"""
        return []

+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
+        pass
+
    async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
        """dummy implementation of abstract method in base"""
        return ""
--- a/metagpt/provider/metagpt_api.py
+++ b/metagpt/provider/metagpt_api.py
@ -5,6 +5,8 @@
@File    : metagpt_api.py
@Desc    : MetaGPT LLM provider.
 """
+from openai.types import CompletionUsage
+
 from metagpt.configs.llm_config import LLMType
 from metagpt.provider import OpenAILLM
 from metagpt.provider.llm_provider_registry import register_provider
@ -12,4 +14,7 @@ from metagpt.provider.llm_provider_registry import register_provider

@register_provider(LLMType.METAGPT)
 class MetaGPTLLM(OpenAILLM):
-    pass
+    def _calc_usage(self, messages: list[dict], rsp: str) -> CompletionUsage:
+        # The current billing is based on usage frequency. If there is a future billing logic based on the
+        # number of tokens, please refine the logic here accordingly.
+        return CompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
--- a/metagpt/provider/ollama_api.py
+++ b/metagpt/provider/ollama_api.py
@ -4,22 +4,12 @@

 import json

-from requests import ConnectionError
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-
 from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.const import LLM_API_TIMEOUT
-from metagpt.logs import log_llm_stream, logger
+from metagpt.logs import log_llm_stream
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.general_api_requestor import GeneralAPIRequestor
 from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import TokenCostManager


@ -36,11 +26,12 @@ class OllamaLLM(BaseLLM):
        self.suffix_url = "/chat"
        self.http_method = "post"
        self.use_system_prompt = False
-        self._cost_manager = TokenCostManager()
+        self.cost_manager = TokenCostManager()

    def __init_ollama(self, config: LLMConfig):
        assert config.base_url, "ollama base url is required!"
        self.model = config.model
+        self.pricing_plan = self.model

    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
        kwargs = {"model": self.model, "messages": messages, "options": {"temperature": 0.3}, "stream": stream}
@ -59,7 +50,7 @@ class OllamaLLM(BaseLLM):
        chunk = chunk.decode(encoding)
        return json.loads(chunk)

-    async def _achat_completion(self, messages: list[dict]) -> dict:
+    async def _achat_completion(self, messages: list[dict], timeout: int = 3) -> dict:
        resp, _, _ = await self.client.arequest(
            method=self.http_method,
            url=self.suffix_url,
@ -72,9 +63,9 @@ class OllamaLLM(BaseLLM):
        return resp

    async def acompletion(self, messages: list[dict], timeout=3) -> dict:
-        return await self._achat_completion(messages)
+        return await self._achat_completion(messages, timeout=timeout)

-    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
        stream_resp, _, _ = await self.client.arequest(
            method=self.http_method,
            url=self.suffix_url,
@ -100,17 +91,3 @@ class OllamaLLM(BaseLLM):
        self._update_costs(usage)
        full_content = "".join(collected_content)
        return full_content
-
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_random_exponential(min=1, max=60),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(ConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
-        """response in async with stream or non-stream mode"""
-        if stream:
-            return await self._achat_completion_stream(messages)
-        resp = await self._achat_completion(messages)
-        return self.get_choice_text(resp)
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -6,10 +6,11 @@
@Modified By: mashenquan, 2023/11/21. Fix bug: ReadTimeout.
@Modified By: mashenquan, 2023/12/1. Fix bug: Unclosed connection caused by openai 0.x.
 """
+from __future__ import annotations

 import json
 import re
-from typing import AsyncIterator, Optional, Union
+from typing import Optional, Union

 from openai import APIConnectionError, AsyncOpenAI, AsyncStream
 from openai._base_client import AsyncHttpxClientWrapper
@ -28,9 +29,13 @@ from metagpt.logs import log_llm_stream, logger
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA
 from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.schema import Message
-from metagpt.utils.common import CodeParser, decode_image
-from metagpt.utils.cost_manager import CostManager, Costs, TokenCostManager
+from metagpt.utils.common import (
+    CodeParser,
+    decode_image,
+    log_and_reraise,
+    process_message,
+)
+from metagpt.utils.cost_manager import CostManager
 from metagpt.utils.exceptions import handle_exception
 from metagpt.utils.token_counter import (
    count_message_tokens,
@ -39,17 +44,6 @@ from metagpt.utils.token_counter import (
 )


-def log_and_reraise(retry_state):
-    logger.error(f"Retry attempts exhausted. Last exception: {retry_state.outcome.exception()}")
-    logger.warning(
-        """
-Recommend going to https://deepwisdom.feishu.cn/wiki/MsGnwQBjiif9c3koSJNcYaoSnu4#part-XdatdVlhEojeAfxaaEZcMV3ZniQ
-See FAQ 5.8
-"""
-    )
-    raise retry_state.outcome.exception()
-
-
@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL])
 class OpenAILLM(BaseLLM):
    """Check https://platform.openai.com/examples for examples"""
@ -63,6 +57,7 @@ class OpenAILLM(BaseLLM):
    def _init_client(self):
        """https://github.com/openai/openai-python#async-usage"""
        self.model = self.config.model  # Used in _calc_usage & _cons_kwargs
+        self.pricing_plan = self.config.pricing_plan or self.model
        kwargs = self._make_client_kwargs()
        self.aclient = AsyncOpenAI(**kwargs)

@ -92,7 +87,9 @@ class OpenAILLM(BaseLLM):
        collected_messages = []
        async for chunk in response:
            chunk_message = chunk.choices[0].delta.content or "" if chunk.choices else ""  # extract the message
-            finish_reason = chunk.choices[0].finish_reason if hasattr(chunk.choices[0], "finish_reason") else None
+            finish_reason = (
+                chunk.choices[0].finish_reason if chunk.choices and hasattr(chunk.choices[0], "finish_reason") else None
+            )
            log_llm_stream(chunk_message)
            collected_messages.append(chunk_message)
            if finish_reason:
@ -150,44 +147,16 @@ class OpenAILLM(BaseLLM):
        rsp = await self._achat_completion(messages, timeout=timeout)
        return self.get_choice_text(rsp)

-    def _func_configs(self, messages: list[dict], timeout=3, **kwargs) -> dict:
-        """Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create"""
-        if "tools" not in kwargs:
-            configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
-            kwargs.update(configs)
-
-        return self._cons_kwargs(messages=messages, timeout=timeout, **kwargs)
-
-    def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
-        """convert messages to list[dict]."""
-        # 全部转成list
-        if not isinstance(messages, list):
-            messages = [messages]
-
-        # 转成list[dict]
-        processed_messages = []
-        for msg in messages:
-            if isinstance(msg, str):
-                processed_messages.append({"role": "user", "content": msg})
-            elif isinstance(msg, dict):
-                assert set(msg.keys()) == set(["role", "content"])
-                processed_messages.append(msg)
-            elif isinstance(msg, Message):
-                processed_messages.append(msg.to_dict())
-            else:
-                raise ValueError(
-                    f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!"
-                )
-        return processed_messages
-
-    async def _achat_completion_function(self, messages: list[dict], timeout=3, **chat_configs) -> ChatCompletion:
-        messages = self._process_message(messages)
-        kwargs = self._func_configs(messages=messages, timeout=timeout, **chat_configs)
+    async def _achat_completion_function(
+        self, messages: list[dict], timeout: int = 3, **chat_configs
+    ) -> ChatCompletion:
+        messages = process_message(messages)
+        kwargs = self._cons_kwargs(messages=messages, timeout=timeout, **chat_configs)
        rsp: ChatCompletion = await self.aclient.chat.completions.create(**kwargs)
        self._update_costs(rsp.usage)
        return rsp

-    async def aask_code(self, messages: list[dict], **kwargs) -> dict:
+    async def aask_code(self, messages: list[dict], timeout: int = 3, **kwargs) -> dict:
        """Use function of tools to ask a code.
        Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create

@ -197,12 +166,15 @@ class OpenAILLM(BaseLLM):
        >>> rsp = await llm.aask_code(msg)
        # -> {'language': 'python', 'code': "print('Hello, World!')"}
        """
+        if "tools" not in kwargs:
+            configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
+            kwargs.update(configs)
        rsp = await self._achat_completion_function(messages, **kwargs)
        return self.get_choice_function_arguments(rsp)

    def _parse_arguments(self, arguments: str) -> dict:
        """parse arguments in openai function call"""
-        if "langugae" not in arguments and "code" not in arguments:
+        if "language" not in arguments and "code" not in arguments:
            logger.warning(f"Not found `code`, `language`, We assume it is pure code:\n {arguments}\n. ")
            return {"language": "python", "code": arguments}

@ -269,10 +241,9 @@ class OpenAILLM(BaseLLM):
        if not self.config.calc_usage:
            return usage

-        model = self.model if not isinstance(self.cost_manager, TokenCostManager) else "open-llm-model"
        try:
-            usage.prompt_tokens = count_message_tokens(messages, model)
-            usage.completion_tokens = count_string_tokens(rsp, model)
+            usage.prompt_tokens = count_message_tokens(messages, self.pricing_plan)
+            usage.completion_tokens = count_string_tokens(rsp, self.pricing_plan)
        except Exception as e:
            logger.warning(f"usage calculation failed: {e}")

--- a/metagpt/provider/qianfan_api.py
+++ b/metagpt/provider/qianfan_api.py
@ -7,19 +7,11 @@ import os
 import qianfan
 from qianfan import ChatCompletion
 from qianfan.resources.typing import JsonBody
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)

 from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import log_llm_stream, logger
+from metagpt.logs import log_llm_stream
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import log_and_reraise
 from metagpt.utils.cost_manager import CostManager
 from metagpt.utils.token_counter import (
    QIANFAN_ENDPOINT_TOKEN_COSTS,
@ -115,15 +107,15 @@ class QianFanLLM(BaseLLM):
        self._update_costs(resp.body.get("usage", {}))
        return resp.body

-    async def _achat_completion(self, messages: list[dict]) -> JsonBody:
+    async def _achat_completion(self, messages: list[dict], timeout: int = 3) -> JsonBody:
        resp = await self.aclient.ado(**self._const_kwargs(messages=messages, stream=False))
        self._update_costs(resp.body.get("usage", {}))
        return resp.body

-    async def acompletion(self, messages: list[dict], timeout=3) -> JsonBody:
-        return await self._achat_completion(messages)
+    async def acompletion(self, messages: list[dict], timeout: int = 3) -> JsonBody:
+        return await self._achat_completion(messages, timeout=timeout)

-    async def _achat_completion_stream(self, messages: list[dict]) -> str:
+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
        resp = await self.aclient.ado(**self._const_kwargs(messages=messages, stream=True))
        collected_content = []
        usage = {}
@ -137,16 +129,3 @@ class QianFanLLM(BaseLLM):
        self._update_costs(usage)
        full_content = "".join(collected_content)
        return full_content
-
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_random_exponential(min=1, max=60),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(ConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
-        if stream:
-            return await self._achat_completion_stream(messages)
-        resp = await self._achat_completion(messages)
-        return self.get_choice_text(resp)
--- a/metagpt/provider/spark_api.py
+++ b/metagpt/provider/spark_api.py
@ -31,12 +31,18 @@ class SparkLLM(BaseLLM):
    def get_choice_text(self, rsp: dict) -> str:
        return rsp["payload"]["choices"]["text"][-1]["content"]

+    async def _achat_completion_stream(self, messages: list[dict], timeout: int = 3) -> str:
+        pass
+
    async def acompletion_text(self, messages: list[dict], stream=False, timeout: int = 3) -> str:
        # 不支持
        # logger.warning("当前方法无法支持异步运行。当你使用acompletion时，并不能并行访问。")
        w = GetMessageFromWeb(messages, self.config)
        return w.run()

+    async def _achat_completion(self, messages: list[dict], timeout=3):
+        pass
+
    async def acompletion(self, messages: list[dict], timeout=3):
        # 不支持异步
        w = GetMessageFromWeb(messages, self.config)
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@ -5,21 +5,12 @@
 from enum import Enum
 from typing import Optional

-from requests import ConnectionError
-from tenacity import (
-    after_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_random_exponential,
-)
 from zhipuai.types.chat.chat_completion import Completion

 from metagpt.configs.llm_config import LLMConfig, LLMType
-from metagpt.logs import log_llm_stream, logger
+from metagpt.logs import log_llm_stream
 from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
-from metagpt.provider.openai_api import log_and_reraise
 from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
 from metagpt.utils.cost_manager import CostManager

@ -47,6 +38,7 @@ class ZhiPuAILLM(BaseLLM):
        assert self.config.api_key
        self.api_key = self.config.api_key
        self.model = self.config.model  # so far, it support glm-3-turbo、glm-4
+        self.pricing_plan = self.config.pricing_plan or self.model
        self.llm = ZhiPuModelAPI(api_key=self.api_key)

    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
@ -86,17 +78,3 @@ class ZhiPuAILLM(BaseLLM):
        self._update_costs(usage)
        full_content = "".join(collected_content)
        return full_content
-
-    @retry(
-        stop=stop_after_attempt(3),
-        wait=wait_random_exponential(min=1, max=60),
-        after=after_log(logger, logger.level("WARNING").name),
-        retry=retry_if_exception_type(ConnectionError),
-        retry_error_callback=log_and_reraise,
-    )
-    async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
-        """response in async with stream or non-stream mode"""
-        if stream:
-            return await self._achat_completion_stream(messages)
-        resp = await self._achat_completion(messages)
-        return self.get_choice_text(resp)
--- a/metagpt/repo_parser.py
+++ b/metagpt/repo_parser.py
@ -1,6 +1,10 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
+Build a symbols repository from source code.
+
+This script is designed to create a symbols repository from the provided source code.
+
@Time    : 2023/11/17 17:58
@Author  : alexanderwu
@File    : repo_parser.py
@ -15,15 +19,26 @@ from pathlib import Path
 from typing import Dict, List, Optional

 import pandas as pd
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator

 from metagpt.const import AGGREGATION, COMPOSITION, GENERALIZATION
 from metagpt.logs import logger
-from metagpt.utils.common import any_to_str, aread
+from metagpt.utils.common import any_to_str, aread, remove_white_spaces
 from metagpt.utils.exceptions import handle_exception


 class RepoFileInfo(BaseModel):
+    """
+    Repository data element that represents information about a file.
+
+    Attributes:
+        file (str): The name or path of the file.
+        classes (List): A list of class names present in the file.
+        functions (List): A list of function names present in the file.
+        globals (List): A list of global variable names present in the file.
+        page_info (List): A list of page-related information associated with the file.
+    """
+
    file: str
    classes: List = Field(default_factory=list)
    functions: List = Field(default_factory=list)
@ -32,6 +47,17 @@ class RepoFileInfo(BaseModel):


 class CodeBlockInfo(BaseModel):
+    """
+    Repository data element representing information about a code block.
+
+    Attributes:
+        lineno (int): The starting line number of the code block.
+        end_lineno (int): The ending line number of the code block.
+        type_name (str): The type or category of the code block.
+        tokens (List): A list of tokens present in the code block.
+        properties (Dict): A dictionary containing additional properties associated with the code block.
+    """
+
    lineno: int
    end_lineno: int
    type_name: str
@ -39,31 +65,395 @@ class CodeBlockInfo(BaseModel):
    properties: Dict = Field(default_factory=dict)


-class ClassInfo(BaseModel):
+class DotClassAttribute(BaseModel):
+    """
+    Repository data element representing a class attribute in dot format.
+
+    Attributes:
+        name (str): The name of the class attribute.
+        type_ (str): The type of the class attribute.
+        default_ (str): The default value of the class attribute.
+        description (str): A description of the class attribute.
+        compositions (List[str]): A list of compositions associated with the class attribute.
+    """
+
+    name: str = ""
+    type_: str = ""
+    default_: str = ""
+    description: str
+    compositions: List[str] = Field(default_factory=list)
+
+    @classmethod
+    def parse(cls, v: str) -> "DotClassAttribute":
+        """
+        Parses dot format text and returns a DotClassAttribute object.
+
+        Args:
+            v (str): Dot format text to be parsed.
+
+        Returns:
+            DotClassAttribute: An instance of the DotClassAttribute class representing the parsed data.
+        """
+        val = ""
+        meet_colon = False
+        meet_equals = False
+        for c in v:
+            if c == ":":
+                meet_colon = True
+            elif c == "=":
+                meet_equals = True
+                if not meet_colon:
+                    val += ":"
+                    meet_colon = True
+            val += c
+        if not meet_colon:
+            val += ":"
+        if not meet_equals:
+            val += "="
+
+        cix = val.find(":")
+        eix = val.rfind("=")
+        name = val[0:cix].strip()
+        type_ = val[cix + 1 : eix]
+        default_ = val[eix + 1 :].strip()
+
+        type_ = remove_white_spaces(type_)  # remove white space
+        if type_ == "NoneType":
+            type_ = ""
+        if "Literal[" in type_:
+            pre_l, literal, post_l = cls._split_literal(type_)
+            composition_val = pre_l + "Literal" + post_l  # replace Literal[...] with Literal
+            type_ = pre_l + literal + post_l
+        else:
+            type_ = re.sub(r"['\"]+", "", type_)  # remove '"
+            composition_val = type_
+
+        if default_ == "None":
+            default_ = ""
+        compositions = cls.parse_compositions(composition_val)
+        return cls(name=name, type_=type_, default_=default_, description=v, compositions=compositions)
+
+    @staticmethod
+    def parse_compositions(types_part) -> List[str]:
+        """
+        Parses the type definition code block of source code and returns a list of compositions.
+
+        Args:
+            types_part: The type definition code block to be parsed.
+
+        Returns:
+            List[str]: A list of compositions extracted from the type definition code block.
+        """
+        if not types_part:
+            return []
+        modified_string = re.sub(r"[\[\],\(\)]", "|", types_part)
+        types = modified_string.split("|")
+        filters = {
+            "str",
+            "frozenset",
+            "set",
+            "int",
+            "float",
+            "complex",
+            "bool",
+            "dict",
+            "list",
+            "Union",
+            "Dict",
+            "Set",
+            "Tuple",
+            "NoneType",
+            "None",
+            "Any",
+            "Optional",
+            "Iterator",
+            "Literal",
+            "List",
+        }
+        result = set()
+        for t in types:
+            t = re.sub(r"['\"]+", "", t.strip())
+            if t and t not in filters:
+                result.add(t)
+        return list(result)
+
+    @staticmethod
+    def _split_literal(v):
+        """
+        Parses the literal definition code block and returns three parts: pre-part, literal-part, and post-part.
+
+        Args:
+            v: The literal definition code block to be parsed.
+
+        Returns:
+            Tuple[str, str, str]: A tuple containing the pre-part, literal-part, and post-part of the code block.
+        """
+        tag = "Literal["
+        bix = v.find(tag)
+        eix = len(v) - 1
+        counter = 1
+        for i in range(bix + len(tag), len(v) - 1):
+            c = v[i]
+            if c == "[":
+                counter += 1
+                continue
+            if c == "]":
+                counter -= 1
+                if counter > 0:
+                    continue
+                eix = i
+                break
+        pre_l = v[0:bix]
+        post_l = v[eix + 1 :]
+        pre_l = re.sub(r"['\"]", "", pre_l)  # remove '"
+        pos_l = re.sub(r"['\"]", "", post_l)  # remove '"
+
+        return pre_l, v[bix : eix + 1], pos_l
+
+    @field_validator("compositions", mode="after")
+    @classmethod
+    def sort(cls, lst: List) -> List:
+        """
+        Auto-sorts a list attribute after making changes.
+
+        Args:
+            lst (List): The list attribute to be sorted.
+
+        Returns:
+            List: The sorted list.
+        """
+        lst.sort()
+        return lst
+
+
+class DotClassInfo(BaseModel):
+    """
+    Repository data element representing information about a class in dot format.
+
+    Attributes:
+        name (str): The name of the class.
+        package (Optional[str]): The package to which the class belongs (optional).
+        attributes (Dict[str, DotClassAttribute]): A dictionary of attributes associated with the class.
+        methods (Dict[str, DotClassMethod]): A dictionary of methods associated with the class.
+        compositions (List[str]): A list of compositions associated with the class.
+        aggregations (List[str]): A list of aggregations associated with the class.
+    """
+
    name: str
    package: Optional[str] = None
-    attributes: Dict[str, str] = Field(default_factory=dict)
-    methods: Dict[str, str] = Field(default_factory=dict)
+    attributes: Dict[str, DotClassAttribute] = Field(default_factory=dict)
+    methods: Dict[str, DotClassMethod] = Field(default_factory=dict)
+    compositions: List[str] = Field(default_factory=list)
+    aggregations: List[str] = Field(default_factory=list)
+
+    @field_validator("compositions", "aggregations", mode="after")
+    @classmethod
+    def sort(cls, lst: List) -> List:
+        """
+        Auto-sorts a list attribute after making changes.
+
+        Args:
+            lst (List): The list attribute to be sorted.
+
+        Returns:
+            List: The sorted list.
+        """
+        lst.sort()
+        return lst


-class ClassRelationship(BaseModel):
+class DotClassRelationship(BaseModel):
+    """
+    Repository data element representing a relationship between two classes in dot format.
+
+    Attributes:
+        src (str): The source class of the relationship.
+        dest (str): The destination class of the relationship.
+        relationship (str): The type or nature of the relationship.
+        label (Optional[str]): An optional label associated with the relationship.
+    """
+
    src: str = ""
    dest: str = ""
    relationship: str = ""
    label: Optional[str] = None


+class DotReturn(BaseModel):
+    """
+    Repository data element representing a function or method return type in dot format.
+
+    Attributes:
+        type_ (str): The type of the return.
+        description (str): A description of the return type.
+        compositions (List[str]): A list of compositions associated with the return type.
+    """
+
+    type_: str = ""
+    description: str
+    compositions: List[str] = Field(default_factory=list)
+
+    @classmethod
+    def parse(cls, v: str) -> "DotReturn" | None:
+        """
+        Parses the return type part of dot format text and returns a DotReturn object.
+
+        Args:
+            v (str): The dot format text containing the return type part to be parsed.
+
+        Returns:
+            DotReturn | None: An instance of the DotReturn class representing the parsed return type,
+                             or None if parsing fails.
+        """
+        if not v:
+            return DotReturn(description=v)
+        type_ = remove_white_spaces(v)
+        compositions = DotClassAttribute.parse_compositions(type_)
+        return cls(type_=type_, description=v, compositions=compositions)
+
+    @field_validator("compositions", mode="after")
+    @classmethod
+    def sort(cls, lst: List) -> List:
+        """
+        Auto-sorts a list attribute after making changes.
+
+        Args:
+            lst (List): The list attribute to be sorted.
+
+        Returns:
+            List: The sorted list.
+        """
+        lst.sort()
+        return lst
+
+
+class DotClassMethod(BaseModel):
+    name: str
+    args: List[DotClassAttribute] = Field(default_factory=list)
+    return_args: Optional[DotReturn] = None
+    description: str
+    aggregations: List[str] = Field(default_factory=list)
+
+    @classmethod
+    def parse(cls, v: str) -> "DotClassMethod":
+        """
+        Parses a dot format method text and returns a DotClassMethod object.
+
+        Args:
+            v (str): The dot format text containing method information to be parsed.
+
+        Returns:
+            DotClassMethod: An instance of the DotClassMethod class representing the parsed method.
+        """
+        bix = v.find("(")
+        eix = v.rfind(")")
+        rix = v.rfind(":")
+        if rix < 0 or rix < eix:
+            rix = eix
+        name_part = v[0:bix].strip()
+        args_part = v[bix + 1 : eix].strip()
+        return_args_part = v[rix + 1 :].strip()
+
+        name = cls._parse_name(name_part)
+        args = cls._parse_args(args_part)
+        return_args = DotReturn.parse(return_args_part)
+        aggregations = set()
+        for i in args:
+            aggregations.update(set(i.compositions))
+        aggregations.update(set(return_args.compositions))
+
+        return cls(name=name, args=args, description=v, return_args=return_args, aggregations=list(aggregations))
+
+    @staticmethod
+    def _parse_name(v: str) -> str:
+        """
+        Parses the dot format method name part and returns the method name.
+
+        Args:
+            v (str): The dot format text containing the method name part to be parsed.
+
+        Returns:
+            str: The parsed method name.
+        """
+        tags = [">", "</"]
+        if tags[0] in v:
+            bix = v.find(tags[0]) + len(tags[0])
+            eix = v.rfind(tags[1])
+            return v[bix:eix].strip()
+        return v.strip()
+
+    @staticmethod
+    def _parse_args(v: str) -> List[DotClassAttribute]:
+        """
+        Parses the dot format method arguments part and returns the parsed arguments.
+
+        Args:
+            v (str): The dot format text containing the arguments part to be parsed.
+
+        Returns:
+            str: The parsed method arguments.
+        """
+        if not v:
+            return []
+        parts = []
+        bix = 0
+        counter = 0
+        for i in range(0, len(v)):
+            c = v[i]
+            if c == "[":
+                counter += 1
+                continue
+            elif c == "]":
+                counter -= 1
+                continue
+            elif c == "," and counter == 0:
+                parts.append(v[bix:i].strip())
+                bix = i + 1
+        parts.append(v[bix:].strip())
+
+        attrs = []
+        for p in parts:
+            if p:
+                attr = DotClassAttribute.parse(p)
+                attrs.append(attr)
+        return attrs
+
+
 class RepoParser(BaseModel):
+    """
+    Tool to build a symbols repository from a project directory.
+
+    Attributes:
+        base_directory (Path): The base directory of the project.
+    """
+
    base_directory: Path = Field(default=None)

    @classmethod
    @handle_exception(exception_type=Exception, default_return=[])
    def _parse_file(cls, file_path: Path) -> list:
-        """Parse a Python file in the repository."""
+        """
+        Parses a Python file in the repository.
+
+        Args:
+            file_path (Path): The path to the Python file to be parsed.
+
+        Returns:
+            list: A list containing the parsed symbols from the file.
+        """
        return ast.parse(file_path.read_text()).body

    def extract_class_and_function_info(self, tree, file_path) -> RepoFileInfo:
-        """Extract class, function, and global variable information from the AST."""
+        """
+        Extracts class, function, and global variable information from the Abstract Syntax Tree (AST).
+
+        Args:
+            tree: The Abstract Syntax Tree (AST) of the Python file.
+            file_path: The path to the Python file.
+
+        Returns:
+            RepoFileInfo: A RepoFileInfo object containing the extracted information.
+        """
        file_info = RepoFileInfo(file=str(file_path.relative_to(self.base_directory)))
        for node in tree:
            info = RepoParser.node_to_str(node)
@ -81,11 +471,17 @@ class RepoParser(BaseModel):
        return file_info

    def generate_symbols(self) -> List[RepoFileInfo]:
+        """
+        Builds a symbol repository from '.py' and '.js' files in the project directory.
+
+        Returns:
+            List[RepoFileInfo]: A list of RepoFileInfo objects containing the extracted information.
+        """
        files_classes = []
        directory = self.base_directory

        matching_files = []
-        extensions = ["*.py", "*.js"]
+        extensions = ["*.py"]
        for ext in extensions:
            matching_files += directory.rglob(ext)
        for path in matching_files:
@ -95,19 +491,38 @@ class RepoParser(BaseModel):

        return files_classes

-    def generate_json_structure(self, output_path):
-        """Generate a JSON file documenting the repository structure."""
+    def generate_json_structure(self, output_path: Path):
+        """
+        Generates a JSON file documenting the repository structure.
+
+        Args:
+            output_path (Path): The path to the JSON file to be generated.
+        """
        files_classes = [i.model_dump() for i in self.generate_symbols()]
        output_path.write_text(json.dumps(files_classes, indent=4))

-    def generate_dataframe_structure(self, output_path):
-        """Generate a DataFrame documenting the repository structure and save as CSV."""
+    def generate_dataframe_structure(self, output_path: Path):
+        """
+        Generates a DataFrame documenting the repository structure and saves it as a CSV file.
+
+        Args:
+            output_path (Path): The path to the CSV file to be generated.
+        """
        files_classes = [i.model_dump() for i in self.generate_symbols()]
        df = pd.DataFrame(files_classes)
        df.to_csv(output_path, index=False)

-    def generate_structure(self, output_path=None, mode="json") -> Path:
-        """Generate the structure of the repository as a specified format."""
+    def generate_structure(self, output_path: str | Path = None, mode="json") -> Path:
+        """
+        Generates the structure of the repository in a specified format.
+
+        Args:
+            output_path (str | Path): The path to the output file or directory. Default is None.
+            mode (str): The output format mode. Options: "json" (default), "csv", etc.
+
+        Returns:
+            Path: The path to the generated output file or directory.
+        """
        output_file = self.base_directory / f"{self.base_directory.name}-structure.{mode}"
        output_path = Path(output_path) if output_path else output_file

@ -119,6 +534,16 @@ class RepoParser(BaseModel):

    @staticmethod
    def node_to_str(node) -> CodeBlockInfo | None:
+        """
+        Parses and converts an Abstract Syntax Tree (AST) node to a CodeBlockInfo object.
+
+        Args:
+            node: The AST node to be converted.
+
+        Returns:
+            CodeBlockInfo | None: A CodeBlockInfo object representing the parsed AST node,
+                                  or None if the conversion fails.
+        """
        if isinstance(node, ast.Try):
            return None
        if any_to_str(node) == any_to_str(ast.Expr):
@ -159,9 +584,19 @@ class RepoParser(BaseModel):

    @staticmethod
    def _parse_expr(node) -> List:
+        """
+        Parses an expression Abstract Syntax Tree (AST) node.
+
+        Args:
+            node: The AST node representing an expression.
+
+        Returns:
+            List: A list containing the parsed information from the expression node.
+        """
        funcs = {
            any_to_str(ast.Constant): lambda x: [any_to_str(x.value), RepoParser._parse_variable(x.value)],
            any_to_str(ast.Call): lambda x: [any_to_str(x.value), RepoParser._parse_variable(x.value.func)],
+            any_to_str(ast.Tuple): lambda x: [any_to_str(x.value), RepoParser._parse_variable(x.value)],
        }
        func = funcs.get(any_to_str(node.value))
        if func:
@ -170,12 +605,30 @@ class RepoParser(BaseModel):

    @staticmethod
    def _parse_name(n):
+        """
+        Gets the 'name' value of an Abstract Syntax Tree (AST) node.
+
+        Args:
+            n: The AST node.
+
+        Returns:
+            The 'name' value of the AST node.
+        """
        if n.asname:
            return f"{n.name} as {n.asname}"
        return n.name

    @staticmethod
    def _parse_if(n):
+        """
+        Parses an 'if' statement Abstract Syntax Tree (AST) node.
+
+        Args:
+            n: The AST node representing an 'if' statement.
+
+        Returns:
+            None or Parsed information from the 'if' statement node.
+        """
        tokens = []
        try:
            if isinstance(n.test, ast.BoolOp):
@ -187,10 +640,14 @@ class RepoParser(BaseModel):
                v = RepoParser._parse_variable(n.test.left)
                if v:
                    tokens.append(v)
-            for item in n.test.comparators:
-                v = RepoParser._parse_variable(item)
-                if v:
-                    tokens.append(v)
+            if isinstance(n.test, ast.Name):
+                v = RepoParser._parse_variable(n.test)
+                tokens.append(v)
+            if hasattr(n.test, "comparators"):
+                for item in n.test.comparators:
+                    v = RepoParser._parse_variable(item)
+                    if v:
+                        tokens.append(v)
            return tokens
        except Exception as e:
            logger.warning(f"Unsupported if: {n}, err:{e}")
@ -198,6 +655,15 @@ class RepoParser(BaseModel):

    @staticmethod
    def _parse_if_compare(n):
+        """
+        Parses an 'if' condition Abstract Syntax Tree (AST) node.
+
+        Args:
+            n: The AST node representing an 'if' condition.
+
+        Returns:
+            None or Parsed information from the 'if' condition node.
+        """
        if hasattr(n, "left"):
            return RepoParser._parse_variable(n.left)
        else:
@ -205,6 +671,15 @@ class RepoParser(BaseModel):

    @staticmethod
    def _parse_variable(node):
+        """
+        Parses a variable Abstract Syntax Tree (AST) node.
+
+        Args:
+            node: The AST node representing a variable.
+
+        Returns:
+            None or Parsed information from the variable node.
+        """
        try:
            funcs = {
                any_to_str(ast.Constant): lambda x: x.value,
@ -213,7 +688,7 @@ class RepoParser(BaseModel):
                if hasattr(x.value, "id")
                else f"{x.attr}",
                any_to_str(ast.Call): lambda x: RepoParser._parse_variable(x.func),
-                any_to_str(ast.Tuple): lambda x: "",
+                any_to_str(ast.Tuple): lambda x: [d.value for d in x.dims],
            }
            func = funcs.get(any_to_str(node))
            if not func:
@ -224,22 +699,42 @@ class RepoParser(BaseModel):

    @staticmethod
    def _parse_assign(node):
+        """
+        Parses an assignment Abstract Syntax Tree (AST) node.
+
+        Args:
+            node: The AST node representing an assignment.
+
+        Returns:
+            None or Parsed information from the assignment node.
+        """
        return [RepoParser._parse_variable(t) for t in node.targets]

    async def rebuild_class_views(self, path: str | Path = None):
+        """
+        Executes `pylint` to reconstruct the dot format class view repository file.
+
+        Args:
+            path (str | Path): The path to the target directory or file. Default is None.
+        """
        if not path:
            path = self.base_directory
        path = Path(path)
        if not path.exists():
            return
+        init_file = path / "__init__.py"
+        if not init_file.exists():
+            raise ValueError("Failed to import module __init__ with error:No module named __init__.")
        command = f"pyreverse {str(path)} -o dot"
-        result = subprocess.run(command, shell=True, check=True, cwd=str(path))
+        output_dir = path / "__dot__"
+        output_dir.mkdir(parents=True, exist_ok=True)
+        result = subprocess.run(command, shell=True, check=True, cwd=str(output_dir))
        if result.returncode != 0:
            raise ValueError(f"{result}")
-        class_view_pathname = path / "classes.dot"
+        class_view_pathname = output_dir / "classes.dot"
        class_views = await self._parse_classes(class_view_pathname)
        relationship_views = await self._parse_class_relationships(class_view_pathname)
-        packages_pathname = path / "packages.dot"
+        packages_pathname = output_dir / "packages.dot"
        class_views, relationship_views, package_root = RepoParser._repair_namespaces(
            class_views=class_views, relationship_views=relationship_views, path=path
        )
@ -247,7 +742,17 @@ class RepoParser(BaseModel):
        packages_pathname.unlink(missing_ok=True)
        return class_views, relationship_views, package_root

-    async def _parse_classes(self, class_view_pathname):
+    @staticmethod
+    async def _parse_classes(class_view_pathname: Path) -> List[DotClassInfo]:
+        """
+        Parses a dot format class view repository file.
+
+        Args:
+            class_view_pathname (Path): The path to the dot format class view repository file.
+
+        Returns:
+            List[DotClassInfo]: A list of DotClassInfo objects representing the parsed classes.
+        """
        class_views = []
        if not class_view_pathname.exists():
            return class_views
@ -258,22 +763,38 @@ class RepoParser(BaseModel):
            if not package_name:
                continue
            class_name, members, functions = re.split(r"(?<!\\)\|", info)
-            class_info = ClassInfo(name=class_name)
+            class_info = DotClassInfo(name=class_name)
            class_info.package = package_name
            for m in members.split("\n"):
                if not m:
                    continue
-                member_name = m.split(":", 1)[0].strip() if ":" in m else m.strip()
-                class_info.attributes[member_name] = m
+                attr = DotClassAttribute.parse(m)
+                class_info.attributes[attr.name] = attr
+                for i in attr.compositions:
+                    if i not in class_info.compositions:
+                        class_info.compositions.append(i)
            for f in functions.split("\n"):
                if not f:
                    continue
-                function_name, _ = f.split("(", 1)
-                class_info.methods[function_name] = f
+                method = DotClassMethod.parse(f)
+                class_info.methods[method.name] = method
+                for i in method.aggregations:
+                    if i not in class_info.compositions and i not in class_info.aggregations:
+                        class_info.aggregations.append(i)
            class_views.append(class_info)
        return class_views

-    async def _parse_class_relationships(self, class_view_pathname) -> List[ClassRelationship]:
+    @staticmethod
+    async def _parse_class_relationships(class_view_pathname: Path) -> List[DotClassRelationship]:
+        """
+        Parses a dot format class view repository file.
+
+        Args:
+            class_view_pathname (Path): The path to the dot format class view repository file.
+
+        Returns:
+            List[DotClassRelationship]: A list of DotClassRelationship objects representing the parsed class relationships.
+        """
        relationship_views = []
        if not class_view_pathname.exists():
            return relationship_views
@ -287,7 +808,16 @@ class RepoParser(BaseModel):
        return relationship_views

    @staticmethod
-    def _split_class_line(line):
+    def _split_class_line(line: str) -> (str, str):
+        """
+        Parses a dot format line about class info and returns the class name part and class members part.
+
+        Args:
+            line (str): The dot format line containing class information.
+
+        Returns:
+            Tuple[str, str]: A tuple containing the class name part and class members part.
+        """
        part_splitor = '" ['
        if part_splitor not in line:
            return None, None
@ -305,14 +835,25 @@ class RepoParser(BaseModel):
        return class_name, info

    @staticmethod
-    def _split_relationship_line(line):
+    def _split_relationship_line(line: str) -> DotClassRelationship:
+        """
+        Parses a dot format line about the relationship of two classes and returns 'Generalize', 'Composite',
+        or 'Aggregate'.
+
+        Args:
+            line (str): The dot format line containing relationship information.
+
+        Returns:
+            DotClassRelationship: The object of relationship representing either 'Generalize', 'Composite',
+            or 'Aggregate' relationship.
+        """
        splitters = [" -> ", " [", "];"]
        idxs = []
        for tag in splitters:
            if tag not in line:
                return None
            idxs.append(line.find(tag))
-        ret = ClassRelationship()
+        ret = DotClassRelationship()
        ret.src = line[0 : idxs[0]].strip('"')
        ret.dest = line[idxs[0] + len(splitters[0]) : idxs[1]].strip('"')
        properties = line[idxs[1] + len(splitters[1]) : idxs[2]].strip(" ")
@ -330,7 +871,16 @@ class RepoParser(BaseModel):
        return ret

    @staticmethod
-    def _get_label(line):
+    def _get_label(line: str) -> str:
+        """
+        Parses a dot format line and returns the label information.
+
+        Args:
+            line (str): The dot format line containing label information.
+
+        Returns:
+            str: The label information parsed from the line.
+        """
        tag = 'label="'
        if tag not in line:
            return ""
@ -340,6 +890,15 @@ class RepoParser(BaseModel):

    @staticmethod
    def _create_path_mapping(path: str | Path) -> Dict[str, str]:
+        """
+        Creates a mapping table between source code files' paths and module names.
+
+        Args:
+            path (str | Path): The path to the source code files or directory.
+
+        Returns:
+            Dict[str, str]: A dictionary mapping source code file paths to their corresponding module names.
+        """
        mappings = {
            str(path).replace("/", "."): str(path),
        }
@ -363,8 +922,21 @@ class RepoParser(BaseModel):

    @staticmethod
    def _repair_namespaces(
-        class_views: List[ClassInfo], relationship_views: List[ClassRelationship], path: str | Path
-    ) -> (List[ClassInfo], List[ClassRelationship], str):
+        class_views: List[DotClassInfo], relationship_views: List[DotClassRelationship], path: str | Path
+    ) -> (List[DotClassInfo], List[DotClassRelationship], str):
+        """
+        Augments namespaces to the path-prefixed classes and relationships.
+
+        Args:
+            class_views (List[DotClassInfo]): List of DotClassInfo objects representing class views.
+            relationship_views (List[DotClassRelationship]): List of DotClassRelationship objects representing
+                relationships.
+            path (str | Path): The path to the source code files or directory.
+
+        Returns:
+            Tuple[List[DotClassInfo], List[DotClassRelationship], str]: A tuple containing the augmented class views,
+            relationships, and the root path of the package.
+        """
        if not class_views:
            return [], [], ""
        c = class_views[0]
@ -383,28 +955,49 @@ class RepoParser(BaseModel):

        for c in class_views:
            c.package = RepoParser._repair_ns(c.package, new_mappings)
-        for i in range(len(relationship_views)):
-            v = relationship_views[i]
+        for _, v in enumerate(relationship_views):
            v.src = RepoParser._repair_ns(v.src, new_mappings)
            v.dest = RepoParser._repair_ns(v.dest, new_mappings)
-            relationship_views[i] = v
-        return class_views, relationship_views, root_path
+        return class_views, relationship_views, str(path)[: len(root_path)]

    @staticmethod
-    def _repair_ns(package, mappings):
+    def _repair_ns(package: str, mappings: Dict[str, str]) -> str:
+        """
+        Replaces the package-prefix with the namespace-prefix.
+
+        Args:
+            package (str): The package to be repaired.
+            mappings (Dict[str, str]): A dictionary mapping source code file paths to their corresponding packages.
+
+        Returns:
+            str: The repaired namespace.
+        """
        file_ns = package
+        ix = 0
        while file_ns != "":
            if file_ns not in mappings:
                ix = file_ns.rfind(".")
                file_ns = file_ns[0:ix]
                continue
            break
+        if file_ns == "":
+            return ""
        internal_ns = package[ix + 1 :]
        ns = mappings[file_ns] + ":" + internal_ns.replace(".", ":")
        return ns

    @staticmethod
-    def _find_root(full_key, package) -> str:
+    def _find_root(full_key: str, package: str) -> str:
+        """
+        Returns the package root path based on the key, which is the full path, and the package information.
+
+        Args:
+            full_key (str): The full key representing the full path.
+            package (str): The package information.
+
+        Returns:
+            str: The package root path.
+        """
        left = full_key
        while left != "":
            if left in package:
@ -417,5 +1010,14 @@ class RepoParser(BaseModel):
        return "." + full_key[0:ix]


-def is_func(node):
+def is_func(node) -> bool:
+    """
+    Returns True if the given node represents a function.
+
+    Args:
+        node: The Abstract Syntax Tree (AST) node.
+
+    Returns:
+        bool: True if the node represents a function, False otherwise.
+    """
    return isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
--- a/metagpt/roles/assistant.py
+++ b/metagpt/roles/assistant.py
@ -65,7 +65,7 @@ class Assistant(Role):
            prompt += f"If the text explicitly want you to {desc}, return `[SKILL]: {name}` brief and clear. For instance: [SKILL]: {name}\n"
        prompt += 'Otherwise, return `[TALK]: {talk}` brief and clear. For instance: if {talk} is "xxxx" return [TALK]: xxxx\n\n'
        prompt += f"Now what specific action is explicitly mentioned in the text: {last_talk}\n"
-        rsp = await self.llm.aask(prompt, ["You are an action classifier"])
+        rsp = await self.llm.aask(prompt, ["You are an action classifier"], stream=False)
        logger.info(f"THINK: {prompt}\n, THINK RESULT: {rsp}\n")
        return await self._plan(rsp, last_talk=last_talk)

--- a/metagpt/roles/di/init.py
+++ b/metagpt/roles/di/init.py
--- a/metagpt/roles/di/data_interpreter.py
+++ b/metagpt/roles/di/data_interpreter.py
@ -0,0 +1,184 @@
+from __future__ import annotations
+
+import json
+from typing import Literal, Union
+
+from pydantic import Field, model_validator
+
+from metagpt.actions.di.ask_review import ReviewConst
+from metagpt.actions.di.execute_nb_code import ExecuteNbCode
+from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
+from metagpt.logs import logger
+from metagpt.prompts.di.write_analysis_code import DATA_INFO
+from metagpt.roles import Role
+from metagpt.schema import Message, Task, TaskResult
+from metagpt.strategy.task_type import TaskType
+from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
+from metagpt.utils.common import CodeParser
+
+REACT_THINK_PROMPT = """
+# User Requirement
+{user_requirement}
+# Context
+{context}
+
+Output a json following the format:
+```json
+{{
+    "thoughts": str = "Thoughts on current situation, reflect on how you should proceed to fulfill the user requirement",
+    "state": bool = "Decide whether you need to take more actions to complete the user requirement. Return true if you think so. Return false if you think the requirement has been completely fulfilled."
+}}
+```
+"""
+
+
+class DataInterpreter(Role):
+    name: str = "David"
+    profile: str = "DataInterpreter"
+    auto_run: bool = True
+    use_plan: bool = True
+    use_reflection: bool = False
+    execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
+    tools: Union[str, list[str]] = []  # Use special symbol ["<all>"] to indicate use of all registered tools
+    tool_recommender: ToolRecommender = None
+    react_mode: Literal["plan_and_act", "react"] = "plan_and_act"
+    max_react_loop: int = 10  # used for react mode
+
+    @model_validator(mode="after")
+    def set_plan_and_tool(self) -> "Interpreter":
+        self._set_react_mode(react_mode=self.react_mode, max_react_loop=self.max_react_loop, auto_run=self.auto_run)
+        self.use_plan = (
+            self.react_mode == "plan_and_act"
+        )  # create a flag for convenience, overwrite any passed-in value
+        if self.tools:
+            self.tool_recommender = BM25ToolRecommender(tools=self.tools)
+        self.set_actions([WriteAnalysisCode])
+        self._set_state(0)
+        return self
+
+    @property
+    def working_memory(self):
+        return self.rc.working_memory
+
+    async def _think(self) -> bool:
+        """Useful in 'react' mode. Use LLM to decide whether and what to do next."""
+        user_requirement = self.get_memories()[0].content
+        context = self.working_memory.get()
+
+        if not context:
+            # just started the run, we need action certainly
+            self.working_memory.add(self.get_memories()[0])  # add user requirement to working memory
+            self._set_state(0)
+            return True
+
+        prompt = REACT_THINK_PROMPT.format(user_requirement=user_requirement, context=context)
+        rsp = await self.llm.aask(prompt)
+        rsp_dict = json.loads(CodeParser.parse_code(block=None, text=rsp))
+        self.working_memory.add(Message(content=rsp_dict["thoughts"], role="assistant"))
+        need_action = rsp_dict["state"]
+        self._set_state(0) if need_action else self._set_state(-1)
+
+        return need_action
+
+    async def _act(self) -> Message:
+        """Useful in 'react' mode. Return a Message conforming to Role._act interface."""
+        code, _, _ = await self._write_and_exec_code()
+        return Message(content=code, role="assistant", cause_by=WriteAnalysisCode)
+
+    async def _plan_and_act(self) -> Message:
+        rsp = await super()._plan_and_act()
+        await self.execute_code.terminate()
+        return rsp
+
+    async def _act_on_task(self, current_task: Task) -> TaskResult:
+        """Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation."""
+        code, result, is_success = await self._write_and_exec_code()
+        task_result = TaskResult(code=code, result=result, is_success=is_success)
+        return task_result
+
+    async def _write_and_exec_code(self, max_retry: int = 3):
+        counter = 0
+        success = False
+
+        # plan info
+        plan_status = self.planner.get_plan_status() if self.use_plan else ""
+
+        # tool info
+        if self.tools:
+            context = (
+                self.working_memory.get()[-1].content if self.working_memory.get() else ""
+            )  # thoughts from _think stage in 'react' mode
+            plan = self.planner.plan if self.use_plan else None
+            tool_info = await self.tool_recommender.get_recommended_tool_info(context=context, plan=plan)
+        else:
+            tool_info = ""
+
+        # data info
+        await self._check_data()
+
+        while not success and counter < max_retry:
+            ### write code ###
+            code, cause_by = await self._write_code(counter, plan_status, tool_info)
+
+            self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by))
+
+            ### execute code ###
+            result, success = await self.execute_code.run(code)
+            print(result)
+
+            self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))
+
+            ### process execution result ###
+            counter += 1
+
+            if not success and counter >= max_retry:
+                logger.info("coding failed!")
+                review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
+                if ReviewConst.CHANGE_WORDS[0] in review:
+                    counter = 0  # redo the task again with help of human suggestions
+
+        return code, result, success
+
+    async def _write_code(
+        self,
+        counter: int,
+        plan_status: str = "",
+        tool_info: str = "",
+    ):
+        todo = self.rc.todo  # todo is WriteAnalysisCode
+        logger.info(f"ready to {todo.name}")
+        use_reflection = counter > 0 and self.use_reflection  # only use reflection after the first trial
+
+        user_requirement = self.get_memories()[0].content
+
+        code = await todo.run(
+            user_requirement=user_requirement,
+            plan_status=plan_status,
+            tool_info=tool_info,
+            working_memory=self.working_memory.get(),
+            use_reflection=use_reflection,
+        )
+
+        return code, todo
+
+    async def _check_data(self):
+        if (
+            not self.use_plan
+            or not self.planner.plan.get_finished_tasks()
+            or self.planner.plan.current_task.task_type
+            not in [
+                TaskType.DATA_PREPROCESS.type_name,
+                TaskType.FEATURE_ENGINEERING.type_name,
+                TaskType.MODEL_TRAIN.type_name,
+            ]
+        ):
+            return
+        logger.info("Check updated data")
+        code = await CheckData().run(self.planner.plan)
+        if not code.strip():
+            return
+        result, success = await self.execute_code.run(code)
+        if success:
+            print(result)
+            data_info = DATA_INFO.format(info=result)
+            self.working_memory.add(Message(content=data_info, role="user", cause_by=CheckData))
--- a/metagpt/roles/engineer.py
+++ b/metagpt/roles/engineer.py
@ -204,7 +204,6 @@ class Engineer(Role):

    async def _act_code_plan_and_change(self):
        """Write code plan and change that guides subsequent WriteCode and WriteCodeReview"""
-        logger.info("Writing code plan and change..")
        node = await self.rc.todo.run()
        code_plan_and_change = node.instruct_content.model_dump_json()
        dependencies = {
@ -360,9 +359,17 @@ class Engineer(Role):
            summarizations[ctx].append(filename)
        for ctx, filenames in summarizations.items():
            ctx.codes_filenames = filenames
-            self.summarize_todos.append(SummarizeCode(i_context=ctx, context=self.context, llm=self.llm))
+            new_summarize = SummarizeCode(i_context=ctx, context=self.context, llm=self.llm)
+            for i, act in enumerate(self.summarize_todos):
+                if act.i_context.task_filename == new_summarize.i_context.task_filename:
+                    self.summarize_todos[i] = new_summarize
+                    new_summarize = None
+                    break
+            if new_summarize:
+                self.summarize_todos.append(new_summarize)
        if self.summarize_todos:
            self.set_todo(self.summarize_todos[0])
+            self.summarize_todos.pop(0)

    async def _new_code_plan_and_change_action(self):
        """Create a WriteCodePlanAndChange action for subsequent to-do actions."""
--- a/metagpt/roles/mi/interpreter.py
+++ b/metagpt/roles/mi/interpreter.py
@ -1,85 +0,0 @@
-from __future__ import annotations
-
-from pydantic import Field
-
-from metagpt.actions.mi.ask_review import ReviewConst
-from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
-from metagpt.actions.mi.write_analysis_code import (
-    WriteCodeWithoutTools,
-    WriteCodeWithTools,
-)
-from metagpt.logs import logger
-from metagpt.roles import Role
-from metagpt.schema import Message, Task, TaskResult
-
-
-class Interpreter(Role):
-    name: str = "Ivy"
-    profile: str = "Interpreter"
-    auto_run: bool = True
-    use_tools: bool = False
-    execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
-    tools: list[str] = []
-
-    def __init__(
-        self,
-        auto_run=True,
-        use_tools=False,
-        tools=[],
-        **kwargs,
-    ):
-        super().__init__(auto_run=auto_run, use_tools=use_tools, tools=tools, **kwargs)
-        self._set_react_mode(react_mode="plan_and_act", auto_run=auto_run, use_tools=use_tools)
-        if use_tools and tools:
-            from metagpt.tools.tool_registry import (
-                validate_tool_names,  # import upon use
-            )
-
-            self.tools = validate_tool_names(tools)
-            logger.info(f"will only use {self.tools} as tools")
-
-    @property
-    def working_memory(self):
-        return self.rc.working_memory
-
-    async def _act_on_task(self, current_task: Task) -> TaskResult:
-        code, result, is_success = await self._write_and_exec_code()
-        task_result = TaskResult(code=code, result=result, is_success=is_success)
-        return task_result
-
-    async def _write_and_exec_code(self, max_retry: int = 3):
-        counter = 0
-        success = False
-
-        while not success and counter < max_retry:
-            ### write code ###
-            code, cause_by = await self._write_code()
-
-            self.working_memory.add(Message(content=code["code"], role="assistant", cause_by=cause_by))
-
-            ### execute code ###
-            result, success = await self.execute_code.run(**code)
-            print(result)
-
-            self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))
-
-            ### process execution result ###
-            counter += 1
-
-            if not success and counter >= max_retry:
-                logger.info("coding failed!")
-                review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
-                if ReviewConst.CHANGE_WORDS[0] in review:
-                    counter = 0  # redo the task again with help of human suggestions
-
-        return code["code"], result, success
-
-    async def _write_code(self):
-        todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
-        logger.info(f"ready to {todo.name}")
-
-        context = self.planner.get_useful_memories()
-        # print(*context, sep="\n***\n")
-        code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0)
-
-        return code, todo
--- a/metagpt/roles/mi/ml_engineer.py
+++ b/metagpt/roles/mi/ml_engineer.py
@ -1,64 +0,0 @@
-from metagpt.actions.mi.debug_code import DebugCode
-from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
-from metagpt.actions.mi.ml_action import UpdateDataColumns, WriteCodeWithToolsML
-from metagpt.logs import logger
-from metagpt.roles.mi.interpreter import Interpreter
-from metagpt.tools.tool_type import ToolType
-from metagpt.utils.common import any_to_str
-
-
-class MLEngineer(Interpreter):
-    name: str = "Mark"
-    profile: str = "MLEngineer"
-    debug_context: list = []
-    latest_code: str = ""
-
-    async def _write_code(self):
-        if not self.use_tools:
-            return await super()._write_code()
-
-        # In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is.
-        is_first_trial = any_to_str(ExecuteNbCode) not in [msg.cause_by for msg in self.working_memory.get()]
-
-        if is_first_trial:
-            # For the first trial, write task code from scratch
-            column_info = await self._update_data_columns()
-
-            logger.info("Write code with tools")
-            tool_context, code = await WriteCodeWithToolsML(selected_tools=self.tools).run(
-                context=[],  # context assembled inside the Action
-                plan=self.planner.plan,
-                column_info=column_info,
-            )
-            self.debug_context = tool_context
-            cause_by = WriteCodeWithToolsML
-
-        else:
-            # Previous trials resulted in error, debug and rewrite the code
-            logger.warning("We got a bug, now start to debug...")
-            code = await DebugCode().run(
-                code=self.latest_code,
-                runtime_result=self.working_memory.get(),
-                context=self.debug_context,
-            )
-            logger.info(f"new code \n{code}")
-            cause_by = DebugCode
-
-        self.latest_code = code["code"]
-
-        return code, cause_by
-
-    async def _update_data_columns(self):
-        current_task = self.planner.plan.current_task
-        if current_task.task_type not in [
-            ToolType.DATA_PREPROCESS.type_name,
-            ToolType.FEATURE_ENGINEERING.type_name,
-            ToolType.MODEL_TRAIN.type_name,
-        ]:
-            return ""
-        logger.info("Check columns in updated data")
-        code = await UpdateDataColumns().run(self.planner.plan)
-        success = False
-        result, success = await self.execute_code.run(**code)
-        print(result)
-        return result if success else ""
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -283,7 +283,7 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
            self.actions.append(i)
            self.states.append(f"{len(self.actions) - 1}. {action}")

-    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1, auto_run: bool = True, use_tools: bool = False):
+    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1, auto_run: bool = True):
        """Set strategy of the Role reacting to observed Message. Variation lies in how
        this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.

@ -304,9 +304,7 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
        if react_mode == RoleReactMode.REACT:
            self.rc.max_react_loop = max_react_loop
        elif react_mode == RoleReactMode.PLAN_AND_ACT:
-            self.planner = Planner(
-                goal=self.goal, working_memory=self.rc.working_memory, auto_run=auto_run, use_tools=use_tools
-            )
+            self.planner = Planner(goal=self.goal, working_memory=self.rc.working_memory, auto_run=auto_run)

    def _watch(self, actions: Iterable[Type[Action]] | Iterable[Action]):
        """Watch Actions of interest. Role will select Messages caused by these Actions from its personal message
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -46,6 +46,7 @@ from metagpt.const import (
    TASK_FILE_REPO,
 )
 from metagpt.logs import logger
+from metagpt.repo_parser import DotClassInfo
 from metagpt.utils.common import any_to_str, any_to_str_set, import_class
 from metagpt.utils.exceptions import handle_exception
 from metagpt.utils.serialize import (
@ -690,54 +691,64 @@ class CodePlanAndChangeContext(BaseModel):


 # mermaid class view
-class ClassMeta(BaseModel):
+class UMLClassMeta(BaseModel):
    name: str = ""
-    abstraction: bool = False
-    static: bool = False
    visibility: str = ""

+    @staticmethod
+    def name_to_visibility(name: str) -> str:
+        if name == "__init__":
+            return "+"
+        if name.startswith("__"):
+            return "-"
+        elif name.startswith("_"):
+            return "#"
+        return "+"

-class ClassAttribute(ClassMeta):
+
+class UMLClassAttribute(UMLClassMeta):
    value_type: str = ""
    default_value: str = ""

    def get_mermaid(self, align=1) -> str:
        content = "".join(["\t" for i in range(align)]) + self.visibility
        if self.value_type:
-            content += self.value_type + " "
-        content += self.name
+            content += self.value_type.replace(" ", "") + " "
+        name = self.name.split(":", 1)[1] if ":" in self.name else self.name
+        content += name
        if self.default_value:
            content += "="
            if self.value_type not in ["str", "string", "String"]:
                content += self.default_value
            else:
                content += '"' + self.default_value.replace('"', "") + '"'
-        if self.abstraction:
-            content += "*"
-        if self.static:
-            content += "$"
+        # if self.abstraction:
+        #     content += "*"
+        # if self.static:
+        #     content += "$"
        return content


-class ClassMethod(ClassMeta):
-    args: List[ClassAttribute] = Field(default_factory=list)
+class UMLClassMethod(UMLClassMeta):
+    args: List[UMLClassAttribute] = Field(default_factory=list)
    return_type: str = ""

    def get_mermaid(self, align=1) -> str:
        content = "".join(["\t" for i in range(align)]) + self.visibility
-        content += self.name + "(" + ",".join([v.get_mermaid(align=0) for v in self.args]) + ")"
+        name = self.name.split(":", 1)[1] if ":" in self.name else self.name
+        content += name + "(" + ",".join([v.get_mermaid(align=0) for v in self.args]) + ")"
        if self.return_type:
-            content += ":" + self.return_type
-        if self.abstraction:
-            content += "*"
-        if self.static:
-            content += "$"
+            content += " " + self.return_type.replace(" ", "")
+        # if self.abstraction:
+        #     content += "*"
+        # if self.static:
+        #     content += "$"
        return content


-class ClassView(ClassMeta):
-    attributes: List[ClassAttribute] = Field(default_factory=list)
-    methods: List[ClassMethod] = Field(default_factory=list)
+class UMLClassView(UMLClassMeta):
+    attributes: List[UMLClassAttribute] = Field(default_factory=list)
+    methods: List[UMLClassMethod] = Field(default_factory=list)

    def get_mermaid(self, align=1) -> str:
        content = "".join(["\t" for i in range(align)]) + "class " + self.name + "{\n"
@ -747,3 +758,21 @@ class ClassView(ClassMeta):
            content += v.get_mermaid(align=align + 1) + "\n"
        content += "".join(["\t" for i in range(align)]) + "}\n"
        return content
+
+    @classmethod
+    def load_dot_class_info(cls, dot_class_info: DotClassInfo) -> UMLClassView:
+        visibility = UMLClassView.name_to_visibility(dot_class_info.name)
+        class_view = cls(name=dot_class_info.name, visibility=visibility)
+        for i in dot_class_info.attributes.values():
+            visibility = UMLClassAttribute.name_to_visibility(i.name)
+            attr = UMLClassAttribute(name=i.name, visibility=visibility, value_type=i.type_, default_value=i.default_)
+            class_view.attributes.append(attr)
+        for i in dot_class_info.methods.values():
+            visibility = UMLClassMethod.name_to_visibility(i.name)
+            method = UMLClassMethod(name=i.name, visibility=visibility, return_type=i.return_args.type_)
+            for j in i.args:
+                arg = UMLClassAttribute(name=j.name, value_type=j.type_, default_value=j.default_)
+                method.args.append(arg)
+            method.return_type = i.return_args.type_
+            class_view.methods.append(method)
+        return class_view
--- a/metagpt/startup.py
+++ b/metagpt/startup.py
@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2024/3/11 19:16
+@Author  : alexanderwu
+@File    : startup.py
+"""
+
+# DEPRECATED: This file is deprecated and will be removed in the future.
+# The startup.py implementation has been moved to software_company.py
--- a/metagpt/strategy/planner.py
+++ b/metagpt/strategy/planner.py
@ -4,8 +4,8 @@ import json

 from pydantic import BaseModel, Field

-from metagpt.actions.mi.ask_review import AskReview, ReviewConst
-from metagpt.actions.mi.write_plan import (
+from metagpt.actions.di.ask_review import AskReview, ReviewConst
+from metagpt.actions.di.write_plan import (
    WritePlan,
    precheck_update_plan_from_rsp,
    update_plan_from_rsp,
@ -13,6 +13,8 @@ from metagpt.actions.mi.write_plan import (
 from metagpt.logs import logger
 from metagpt.memory import Memory
 from metagpt.schema import Message, Plan, Task, TaskResult
+from metagpt.strategy.task_type import TaskType
+from metagpt.utils.common import remove_comments

 STRUCTURAL_CONTEXT = """
 ## User Requirement
@ -25,6 +27,24 @@ STRUCTURAL_CONTEXT = """
 {current_task}
 """

+PLAN_STATUS = """
+## Finished Tasks
+### code
+```python
+{code_written}
+```
+
+### execution result
+{task_results}
+
+## Current Task
+{current_task}
+
+## Task Guidance
+Write complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.
+Specifically, {guidance}
+"""
+

 class Planner(BaseModel):
    plan: Plan
@ -32,7 +52,6 @@ class Planner(BaseModel):
        default_factory=Memory
    )  # memory for working on each task, discarded each time a task is done
    auto_run: bool = False
-    use_tools: bool = False

    def __init__(self, goal: str = "", plan: Plan = None, **kwargs):
        plan = plan or Plan(goal=goal)
@ -53,7 +72,7 @@ class Planner(BaseModel):
        plan_confirmed = False
        while not plan_confirmed:
            context = self.get_useful_memories()
-            rsp = await WritePlan().run(context, max_tasks=max_tasks, use_tools=self.use_tools)
+            rsp = await WritePlan().run(context, max_tasks=max_tasks)
            self.working_memory.add(Message(content=rsp, role="assistant", cause_by=WritePlan))

            # precheck plan before asking reviews
@ -137,3 +156,23 @@ class Planner(BaseModel):
        context_msg = [Message(content=context, role="user")]

        return context_msg + self.working_memory.get()
+
+    def get_plan_status(self) -> str:
+        # prepare components of a plan status
+        finished_tasks = self.plan.get_finished_tasks()
+        code_written = [remove_comments(task.code) for task in finished_tasks]
+        code_written = "\n\n".join(code_written)
+        task_results = [task.result for task in finished_tasks]
+        task_results = "\n\n".join(task_results)
+        task_type_name = self.current_task.task_type.upper()
+        guidance = TaskType[task_type_name].value.guidance if hasattr(TaskType, task_type_name) else ""
+
+        # combine components in a prompt
+        prompt = PLAN_STATUS.format(
+            code_written=code_written,
+            task_results=task_results,
+            current_task=self.current_task.instruction,
+            guidance=guidance,
+        )
+
+        return prompt
--- a/metagpt/strategy/solver.py
+++ b/metagpt/strategy/solver.py
@ -49,8 +49,8 @@ class TOTSolver(BaseSolver):
        raise NotImplementedError


-class InterpreterSolver(BaseSolver):
-    """InterpreterSolver: Write&Run code in the graph"""
+class DataInterpreterSolver(BaseSolver):
+    """DataInterpreterSolver: Write&Run code in the graph"""

    async def solve(self):
        raise NotImplementedError
--- a/metagpt/strategy/task_type.py
+++ b/metagpt/strategy/task_type.py
@ -0,0 +1,73 @@
+from enum import Enum
+
+from pydantic import BaseModel
+
+from metagpt.prompts.task_type import (
+    DATA_PREPROCESS_PROMPT,
+    EDA_PROMPT,
+    FEATURE_ENGINEERING_PROMPT,
+    IMAGE2WEBPAGE_PROMPT,
+    MODEL_EVALUATE_PROMPT,
+    MODEL_TRAIN_PROMPT,
+)
+
+
+class TaskTypeDef(BaseModel):
+    name: str
+    desc: str = ""
+    guidance: str = ""
+
+
+class TaskType(Enum):
+    """By identifying specific types of tasks, we can inject human priors (guidance) to help task solving"""
+
+    EDA = TaskTypeDef(
+        name="eda",
+        desc="For performing exploratory data analysis",
+        guidance=EDA_PROMPT,
+    )
+    DATA_PREPROCESS = TaskTypeDef(
+        name="data preprocessing",
+        desc="For preprocessing dataset in a data analysis or machine learning task ONLY,"
+        "general data operation doesn't fall into this type",
+        guidance=DATA_PREPROCESS_PROMPT,
+    )
+    FEATURE_ENGINEERING = TaskTypeDef(
+        name="feature engineering",
+        desc="Only for creating new columns for input data.",
+        guidance=FEATURE_ENGINEERING_PROMPT,
+    )
+    MODEL_TRAIN = TaskTypeDef(
+        name="model train",
+        desc="Only for training model.",
+        guidance=MODEL_TRAIN_PROMPT,
+    )
+    MODEL_EVALUATE = TaskTypeDef(
+        name="model evaluate",
+        desc="Only for evaluating model.",
+        guidance=MODEL_EVALUATE_PROMPT,
+    )
+    IMAGE2WEBPAGE = TaskTypeDef(
+        name="image2webpage",
+        desc="For converting image into webpage code.",
+        guidance=IMAGE2WEBPAGE_PROMPT,
+    )
+    OTHER = TaskTypeDef(name="other", desc="Any tasks not in the defined categories")
+
+    # Legacy TaskType to support tool recommendation using type match. You don't need to define task types if you have no human priors to inject.
+    TEXT2IMAGE = TaskTypeDef(
+        name="text2image",
+        desc="Related to text2image, image2image using stable diffusion model.",
+    )
+    WEBSCRAPING = TaskTypeDef(
+        name="web scraping",
+        desc="For scraping data from web pages.",
+    )
+    EMAIL_LOGIN = TaskTypeDef(
+        name="email login",
+        desc="For logging to an email.",
+    )
+
+    @property
+    def type_name(self):
+        return self.value.name
--- a/metagpt/tools/libs/data_preprocess.py
+++ b/metagpt/tools/libs/data_preprocess.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import json
+from typing import Literal

 import numpy as np
 import pandas as pd
@ -16,9 +17,8 @@ from sklearn.preprocessing import (
 )

 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType

-TOOL_TYPE = ToolType.DATA_PREPROCESS.type_name
+TAGS = ["data preprocessing", "machine learning"]


 class MLProcess:
@ -85,20 +85,22 @@ class DataPreprocessTool(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class FillMissingValue(DataPreprocessTool):
    """
    Completing missing values with simple strategies.
    """

-    def __init__(self, features: list, strategy: str = "mean", fill_value=None):
+    def __init__(
+        self, features: list, strategy: Literal["mean", "median", "most_frequent", "constant"] = "mean", fill_value=None
+    ):
        """
        Initialize self.

        Args:
            features (list): Columns to be processed.
-            strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
-                                      be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
+            strategy (Literal["mean", "median", "most_frequent", "constant"], optional): The imputation strategy, notice 'mean' and 'median' can only
+                                      be used for numeric features. Defaults to 'mean'.
            fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
                                        Defaults to None.
        """
@ -106,7 +108,7 @@ class FillMissingValue(DataPreprocessTool):
        self.model = SimpleImputer(strategy=strategy, fill_value=fill_value)


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class MinMaxScale(DataPreprocessTool):
    """
    Transform features by scaling each feature to a range, which is (0, 1).
@ -117,7 +119,7 @@ class MinMaxScale(DataPreprocessTool):
        self.model = MinMaxScaler()


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class StandardScale(DataPreprocessTool):
    """
    Standardize features by removing the mean and scaling to unit variance.
@ -128,7 +130,7 @@ class StandardScale(DataPreprocessTool):
        self.model = StandardScaler()


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class MaxAbsScale(DataPreprocessTool):
    """
    Scale each feature by its maximum absolute value.
@ -139,7 +141,7 @@ class MaxAbsScale(DataPreprocessTool):
        self.model = MaxAbsScaler()


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class RobustScale(DataPreprocessTool):
    """
    Apply the RobustScaler to scale features using statistics that are robust to outliers.
@ -150,7 +152,7 @@ class RobustScale(DataPreprocessTool):
        self.model = RobustScaler()


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class OrdinalEncode(DataPreprocessTool):
    """
    Encode categorical features as ordinal integers.
@ -161,7 +163,7 @@ class OrdinalEncode(DataPreprocessTool):
        self.model = OrdinalEncoder()


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class OneHotEncode(DataPreprocessTool):
    """
    Apply one-hot encoding to specified categorical columns, the original columns will be dropped.
@ -180,7 +182,7 @@ class OneHotEncode(DataPreprocessTool):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class LabelEncode(DataPreprocessTool):
    """
    Apply label encoding to specified categorical columns in-place.
--- a/metagpt/tools/libs/email_login.py
+++ b/metagpt/tools/libs/email_login.py
@ -1,7 +1,6 @@
 from imap_tools import MailBox

 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType

 # Define a dictionary mapping email domains to their IMAP server addresses
 IMAP_SERVERS = {
@ -24,7 +23,7 @@ IMAP_SERVERS = {
 }


-@register_tool(tool_type=ToolType.EMAIL_LOGIN.type_name)
+@register_tool(tags=["email login"])
 def email_login_imap(email_address, email_password):
    """
    Use imap_tools package to log in to your email (the email that supports IMAP protocol) to verify and return the account object.
--- a/metagpt/tools/libs/feature_engineering.py
+++ b/metagpt/tools/libs/feature_engineering.py
@ -19,12 +19,11 @@ from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures

 from metagpt.tools.libs.data_preprocess import MLProcess
 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType

-TOOL_TYPE = ToolType.FEATURE_ENGINEERING.type_name
+TAGS = ["feature engineering", "machine learning"]


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class PolynomialExpansion(MLProcess):
    """
    Add polynomial and interaction features from selected numeric columns to input DataFrame.
@ -67,7 +66,7 @@ class PolynomialExpansion(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class CatCount(MLProcess):
    """
    Add value counts of a categorical column as new feature.
@ -92,7 +91,7 @@ class CatCount(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class TargetMeanEncoder(MLProcess):
    """
    Encode a categorical column by the mean of the label column, and adds the result as a new feature.
@ -119,7 +118,7 @@ class TargetMeanEncoder(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class KFoldTargetMeanEncoder(MLProcess):
    """
    Add a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column.
@ -159,7 +158,7 @@ class KFoldTargetMeanEncoder(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class CatCross(MLProcess):
    """
    Add pairwise crossed features and convert them to numerical features.
@ -216,7 +215,7 @@ class CatCross(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class GroupStat(MLProcess):
    """
    Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'.
@ -248,7 +247,7 @@ class GroupStat(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class SplitBins(MLProcess):
    """
    Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly.
@ -276,7 +275,7 @@ class SplitBins(MLProcess):
        return new_df


-# @register_tool(tool_type=TOOL_TYPE)
+# @register_tool(tags=TAGS)
 class ExtractTimeComps(MLProcess):
    """
    Extract time components from a datetime column and add them as new features.
@ -316,7 +315,7 @@ class ExtractTimeComps(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class GeneralSelection(MLProcess):
    """
    Drop all nan feats and feats with only one unique value.
@ -349,7 +348,7 @@ class GeneralSelection(MLProcess):


 # skip for now because lgb is needed
-# @register_tool(tool_type=TOOL_TYPE)
+# @register_tool(tags=TAGS)
 class TreeBasedSelection(MLProcess):
    """
    Select features based on tree-based model and remove features with low importance.
@ -403,7 +402,7 @@ class TreeBasedSelection(MLProcess):
        return new_df


-@register_tool(tool_type=TOOL_TYPE)
+@register_tool(tags=TAGS)
 class VarianceBasedSelection(MLProcess):
    """
    Select features based on variance and remove features with low variance.
--- a/metagpt/tools/libs/gpt_v_generator.py
+++ b/metagpt/tools/libs/gpt_v_generator.py
@ -5,13 +5,13 @@
@Author  : mannaandpoem
@File    : gpt_v_generator.py
 """
-import os
+import re
 from pathlib import Path

 from metagpt.const import DEFAULT_WORKSPACE_ROOT
+from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType
-from metagpt.utils.common import encode_image
+from metagpt.utils.common import CodeParser, encode_image

 ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX designer, please generate layout information for this image:

@ -28,11 +28,9 @@ As the design pays tribute to large companies, sometimes it is normal for some c
 Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""


-@register_tool(
-    tool_type=ToolType.IMAGE2WEBPAGE.type_name, include_functions=["__init__", "generate_webpages", "save_webpages"]
-)
+@register_tool(tags=["image2webpage"], include_functions=["__init__", "generate_webpages", "save_webpages"])
 class GPTvGenerator:
-    """Class for generating webpages at once.
+    """Class for generating webpage code from a given webpage screenshot.

    This class provides methods to generate webpages including all code (HTML, CSS, and JavaScript) based on an image.
    It utilizes a vision model to analyze the layout from an image and generate webpage codes accordingly.
@ -75,50 +73,34 @@ class GPTvGenerator:
        return await self.llm.aask(msg=prompt, images=[encode_image(image_path)])

    @staticmethod
-    def save_webpages(image_path: str, webpages: str) -> Path:
+    def save_webpages(webpages: str, save_folder_name: str = "example") -> Path:
        """Save webpages including all code (HTML, CSS, and JavaScript) at once.

        Args:
-            image_path (str): The path of the image file.
            webpages (str): The generated webpages content.
+            save_folder_name (str, optional): The name of the folder to save the webpages. Defaults to 'example'.

        Returns:
            Path: The path of the saved webpages.
        """
        # Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files
-        webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem
-        os.makedirs(webpages_path, exist_ok=True)
+        webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / save_folder_name
+        logger.info(f"code will be saved at {webpages_path}")
+        webpages_path.mkdir(parents=True, exist_ok=True)

        index_path = webpages_path / "index.html"
-        try:
-            index = webpages.split("```html")[1].split("```")[0]
-            style_path = None
-            if "styles.css" in index:
-                style_path = webpages_path / "styles.css"
-            elif "style.css" in index:
-                style_path = webpages_path / "style.css"
-            style = webpages.split("```css")[1].split("```")[0] if style_path else ""
+        index_path.write_text(CodeParser.parse_code(block=None, text=webpages, lang="html"))

-            js_path = None
-            if "scripts.js" in index:
-                js_path = webpages_path / "scripts.js"
-            elif "script.js" in index:
-                js_path = webpages_path / "script.js"
+        extract_and_save_code(folder=webpages_path, text=webpages, pattern="styles?.css", language="css")

-            js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
-        except IndexError:
-            raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}")
-
-        try:
-            with open(index_path, "w", encoding="utf-8") as f:
-                f.write(index)
-            if style_path:
-                with open(style_path, "w", encoding="utf-8") as f:
-                    f.write(style)
-            if js_path:
-                with open(js_path, "w", encoding="utf-8") as f:
-                    f.write(js)
-        except FileNotFoundError as e:
-            raise FileNotFoundError(f"Cannot save the webpages to {str(webpages_path)}") from e
+        extract_and_save_code(folder=webpages_path, text=webpages, pattern="scripts?.js", language="javascript")

        return webpages_path
+
+
+def extract_and_save_code(folder, text, pattern, language):
+    word = re.search(pattern, text)
+    if word:
+        path = folder / word.group(0)
+        code = CodeParser.parse_code(block=None, text=text, lang=language)
+        path.write_text(code, encoding="utf-8")
--- a/metagpt/tools/libs/sd_engine.py
+++ b/metagpt/tools/libs/sd_engine.py
@ -14,11 +14,9 @@ import requests
 from aiohttp import ClientSession
 from PIL import Image, PngImagePlugin

-#
 from metagpt.const import SD_OUTPUT_FILE_REPO, SOURCE_ROOT
 from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType

 payload = {
    "prompt": "",
@ -55,7 +53,7 @@ default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"


@register_tool(
-    tool_type=ToolType.STABLE_DIFFUSION.type_name,
+    tags=["text2image", "multimodal"],
    include_functions=["__init__", "simple_run_t2i", "run_t2i", "construct_payload", "save"],
 )
 class SDEngine:
--- a/metagpt/tools/libs/web_scraping.py
+++ b/metagpt/tools/libs/web_scraping.py
@ -1,9 +1,8 @@
 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.tool_type import ToolType
 from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper


-@register_tool(tool_type=ToolType.WEBSCRAPING.type_name)
+@register_tool(tags=["web scraping", "web"])
 async def scrape_web_playwright(url):
    """
    Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@ -2,14 +2,18 @@ import inspect

 from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces

+PARSER = GoogleDocstringParser

-def convert_code_to_tool_schema(obj, include: list[str] = []):
+
+def convert_code_to_tool_schema(obj, include: list[str] = None):
    docstring = inspect.getdoc(obj)
    assert docstring, "no docstring found for the objects, skip registering"

    if inspect.isclass(obj):
        schema = {"type": "class", "description": remove_spaces(docstring), "methods": {}}
        for name, method in inspect.getmembers(obj, inspect.isfunction):
+            if name.startswith("_") and name != "__init__":  # skip private methodss
+                continue
            if include and name not in include:
                continue
            # method_doc = inspect.getdoc(method)
@ -23,54 +27,31 @@ def convert_code_to_tool_schema(obj, include: list[str] = []):
    return schema


-def function_docstring_to_schema(fn_obj, docstring):
+def function_docstring_to_schema(fn_obj, docstring) -> dict:
+    """
+    Converts a function's docstring into a schema dictionary.
+
+    Args:
+        fn_obj: The function object.
+        docstring: The docstring of the function.
+
+    Returns:
+        A dictionary representing the schema of the function's docstring.
+        The dictionary contains the following keys:
+        - 'type': The type of the function ('function' or 'async_function').
+        - 'description': The first section of the docstring describing the function overall. Provided to LLMs for both recommending and using the function.
+        - 'signature': The signature of the function, which helps LLMs understand how to call the function.
+        - 'parameters': Docstring section describing parameters including args and returns, served as extra details for LLM perception.
+    """
+    signature = inspect.signature(fn_obj)
+
+    docstring = remove_spaces(docstring)
+
+    overall_desc, param_desc = PARSER.parse(docstring)
+
    function_type = "function" if not inspect.iscoroutinefunction(fn_obj) else "async_function"
-    return {"type": function_type, **docstring_to_schema(docstring)}

-
-def docstring_to_schema(docstring: str):
-    if docstring is None:
-        return {}
-
-    parser = GoogleDocstringParser(docstring=docstring)
-
-    # 匹配简介部分
-    description = parser.parse_desc()
-
-    # 匹配Args部分
-    params = parser.parse_params()
-    parameter_schema = {"properties": {}, "required": []}
-    for param in params:
-        param_name, param_type, param_desc = param
-        # check required or optional
-        is_optional, param_type = parser.check_and_parse_optional(param_type)
-        if not is_optional:
-            parameter_schema["required"].append(param_name)
-        # type and desc
-        param_dict = {"type": param_type, "description": remove_spaces(param_desc)}
-        # match Default for optional args
-        has_default_val, default_val = parser.check_and_parse_default_value(param_desc)
-        if has_default_val:
-            param_dict["default"] = default_val
-        # match Enum
-        has_enum, enum_vals = parser.check_and_parse_enum(param_desc)
-        if has_enum:
-            param_dict["enum"] = enum_vals
-        # add to parameter schema
-        parameter_schema["properties"].update({param_name: param_dict})
-
-    # 匹配Returns部分
-    returns = parser.parse_returns()
-
-    # 构建YAML字典
-    schema = {
-        "description": description,
-        "parameters": parameter_schema,
-    }
-    if returns:
-        schema["returns"] = [{"type": ret[0], "description": remove_spaces(ret[1])} for ret in returns]
-
-    return schema
+    return {"type": function_type, "description": overall_desc, "signature": str(signature), "parameters": param_desc}


 def get_class_method_docstring(cls, method_name):
--- a/metagpt/tools/tool_data_type.py
+++ b/metagpt/tools/tool_data_type.py
@ -1,12 +1,6 @@
 from pydantic import BaseModel


-class ToolTypeDef(BaseModel):
-    name: str
-    desc: str = ""
-    usage_prompt: str = ""
-
-
 class ToolSchema(BaseModel):
    description: str

@ -16,3 +10,4 @@ class Tool(BaseModel):
    path: str
    schemas: dict = {}
    code: str = ""
+    tags: list[str] = []
--- a/metagpt/tools/tool_recommend.py
+++ b/metagpt/tools/tool_recommend.py
@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import json
+from typing import Any
+
+import jieba
+import numpy as np
+from pydantic import BaseModel, field_validator
+from rank_bm25 import BM25Okapi
+
+from metagpt.llm import LLM
+from metagpt.logs import logger
+from metagpt.schema import Plan
+from metagpt.tools import TOOL_REGISTRY
+from metagpt.tools.tool_data_type import Tool
+from metagpt.tools.tool_registry import validate_tool_names
+from metagpt.utils.common import CodeParser
+
+TOOL_INFO_PROMPT = """
+## Capabilities
+- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python class or function.
+- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
+
+## Available Tools:
+Each tool is described in JSON format. When you call a tool, import the tool from its path first.
+{tool_schemas}
+"""
+
+
+TOOL_RECOMMENDATION_PROMPT = """
+## User Requirement:
+{current_task}
+
+## Task
+Recommend up to {topk} tools from 'Available Tools' that can help solve the 'User Requirement'. 
+
+## Available Tools:
+{available_tools}
+
+## Tool Selection and Instructions:
+- Select tools most relevant to completing the 'User Requirement'.
+- If you believe that no tools are suitable, indicate with an empty list.
+- Only list the names of the tools, not the full schema of each tool.
+- Ensure selected tools are listed in 'Available Tools'.
+- Output a json list of tool names:
+```json
+["tool_name1", "tool_name2", ...]
+```
+"""
+
+
+class ToolRecommender(BaseModel):
+    """
+    The default ToolRecommender:
+    1. Recall: To be implemented in subclasses. Recall tools based on the given context and plan.
+    2. Rank: Use LLM to select final candidates from recalled set.
+    """
+
+    tools: dict[str, Tool] = {}
+    force: bool = False  # whether to forcedly recommend the specified tools
+
+    @field_validator("tools", mode="before")
+    @classmethod
+    def validate_tools(cls, v: list[str]) -> dict[str, Tool]:
+        # One can use special symbol ["<all>"] to indicate use of all registered tools
+        if v == ["<all>"]:
+            return TOOL_REGISTRY.get_all_tools()
+        else:
+            return validate_tool_names(v)
+
+    async def recommend_tools(
+        self, context: str = "", plan: Plan = None, recall_topk: int = 20, topk: int = 5
+    ) -> list[Tool]:
+        """
+        Recommends a list of tools based on the given context and plan. The recommendation process includes two stages: recall from a large pool and rank the recalled tools to select the final set.
+
+        Args:
+            context (str): The context for tool recommendation.
+            plan (Plan): The plan for tool recommendation.
+            recall_topk (int): The number of tools to recall in the initial step.
+            topk (int): The number of tools to return after rank as final recommendations.
+
+        Returns:
+            list[Tool]: A list of recommended tools.
+        """
+
+        if not self.tools:
+            return []
+
+        if self.force or (not context and not plan):
+            # directly use what users have specified as result for forced recommendation;
+            # directly use the whole set if there is no useful information
+            return list(self.tools.values())
+
+        recalled_tools = await self.recall_tools(context=context, plan=plan, topk=recall_topk)
+        if not recalled_tools:
+            return []
+
+        ranked_tools = await self.rank_tools(recalled_tools=recalled_tools, context=context, plan=plan, topk=topk)
+
+        logger.info(f"Recommended tools: \n{[tool.name for tool in ranked_tools]}")
+
+        return ranked_tools
+
+    async def get_recommended_tool_info(self, **kwargs) -> str:
+        """
+        Wrap recommended tools with their info in a string, which can be used directly in a prompt.
+        """
+        recommended_tools = await self.recommend_tools(**kwargs)
+        if not recommended_tools:
+            return ""
+        tool_schemas = {tool.name: tool.schemas for tool in recommended_tools}
+        return TOOL_INFO_PROMPT.format(tool_schemas=tool_schemas)
+
+    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
+        """
+        Retrieves a list of relevant tools from a large pool, based on the given context and plan.
+        """
+        raise NotImplementedError
+
+    async def rank_tools(
+        self, recalled_tools: list[Tool], context: str = "", plan: Plan = None, topk: int = 5
+    ) -> list[Tool]:
+        """
+        Default rank methods for a ToolRecommender. Use LLM to rank the recalled tools based on the given context, plan, and topk value.
+        """
+        current_task = plan.current_task.instruction if plan else context
+
+        available_tools = {tool.name: tool.schemas["description"] for tool in recalled_tools}
+        prompt = TOOL_RECOMMENDATION_PROMPT.format(
+            current_task=current_task,
+            available_tools=available_tools,
+            topk=topk,
+        )
+        rsp = await LLM().aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp)
+        ranked_tools = json.loads(rsp)
+
+        valid_tools = validate_tool_names(ranked_tools)
+
+        return list(valid_tools.values())[:topk]
+
+
+class TypeMatchToolRecommender(ToolRecommender):
+    """
+    A legacy ToolRecommender using task type matching at the recall stage:
+    1. Recall: Find tools based on exact match between task type and tool tag;
+    2. Rank: LLM rank, the same as the default ToolRecommender.
+    """
+
+    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
+        if not plan:
+            return list(self.tools.values())[:topk]
+
+        # find tools based on exact match between task type and tool tag
+        task_type = plan.current_task.task_type
+        candidate_tools = TOOL_REGISTRY.get_tools_by_tag(task_type)
+        candidate_tool_names = set(self.tools.keys()) & candidate_tools.keys()
+        recalled_tools = [candidate_tools[tool_name] for tool_name in candidate_tool_names][:topk]
+
+        logger.info(f"Recalled tools: \n{[tool.name for tool in recalled_tools]}")
+
+        return recalled_tools
+
+
+class BM25ToolRecommender(ToolRecommender):
+    """
+    A ToolRecommender using BM25 at the recall stage:
+    1. Recall: Querying tool descriptions with task instruction if plan exists. Otherwise, return all user-specified tools;
+    2. Rank: LLM rank, the same as the default ToolRecommender.
+    """
+
+    bm25: Any = None
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._init_corpus()
+
+    def _init_corpus(self):
+        corpus = [f"{tool.name} {tool.tags}: {tool.schemas['description']}" for tool in self.tools.values()]
+        tokenized_corpus = [self._tokenize(doc) for doc in corpus]
+        self.bm25 = BM25Okapi(tokenized_corpus)
+
+    def _tokenize(self, text):
+        return jieba.lcut(text)  # FIXME: needs more sophisticated tokenization
+
+    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
+        query = plan.current_task.instruction if plan else context
+
+        query_tokens = self._tokenize(query)
+        doc_scores = self.bm25.get_scores(query_tokens)
+        top_indexes = np.argsort(doc_scores)[::-1][:topk]
+        recalled_tools = [list(self.tools.values())[index] for index in top_indexes]
+
+        logger.info(
+            f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[doc_scores[index] for index in top_indexes]}"
+        )
+
+        return recalled_tools
+
+
+class EmbeddingToolRecommender(ToolRecommender):
+    """
+    NOTE: To be implemented.
+    A ToolRecommender using embeddings at the recall stage:
+    1. Recall: Use embeddings to calculate the similarity between query and tool info;
+    2. Rank: LLM rank, the same as the default ToolRecommender.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
+        pass
--- a/metagpt/tools/tool_registry.py
+++ b/metagpt/tools/tool_registry.py
@ -10,26 +10,20 @@ from __future__ import annotations
 import inspect
 import os
 from collections import defaultdict
+from typing import Union

 import yaml
-from pydantic import BaseModel, field_validator
+from pydantic import BaseModel

 from metagpt.const import TOOL_SCHEMA_PATH
 from metagpt.logs import logger
 from metagpt.tools.tool_convert import convert_code_to_tool_schema
-from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolTypeDef
-from metagpt.tools.tool_type import ToolType
+from metagpt.tools.tool_data_type import Tool, ToolSchema


 class ToolRegistry(BaseModel):
    tools: dict = {}
-    tool_types: dict = {}
-    tools_by_types: dict = defaultdict(dict)  # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
-
-    @field_validator("tool_types", mode="before")
-    @classmethod
-    def init_tool_types(cls, tool_types: ToolType):
-        return {tool_type.type_name: tool_type.value for tool_type in tool_types}
+    tools_by_tags: dict = defaultdict(dict)  # two-layer k-v, {tag: {tool_name: {...}, ...}, ...}

    def register_tool(
        self,
@ -37,25 +31,15 @@ class ToolRegistry(BaseModel):
        tool_path,
        schema_path="",
        tool_code="",
-        tool_type="other",
+        tags=None,
        tool_source_object=None,
-        include_functions=[],
+        include_functions=None,
        verbose=False,
    ):
        if self.has_tool(tool_name):
            return

-        if tool_type not in self.tool_types:
-            # register new tool type on the fly
-            logger.warning(
-                f"{tool_type} not previously defined, will create a temporary tool type with just a name. This tool type is only effective during this runtime. You may consider add this tool type with more configs permanently at metagpt.tools.tool_type"
-            )
-            temp_tool_type_obj = ToolTypeDef(name=tool_type)
-            self.tool_types[tool_type] = temp_tool_type_obj
-            if verbose:
-                logger.info(f"tool type {tool_type} registered")
-
-        schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type / f"{tool_name}.yml"
+        schema_path = schema_path or TOOL_SCHEMA_PATH / f"{tool_name}.yml"

        schemas = make_schema(tool_source_object, include_functions, schema_path)

@ -70,10 +54,11 @@ class ToolRegistry(BaseModel):
            # logger.warning(
            #     f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}"
            # )
-
-        tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code)
+        tags = tags or []
+        tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code, tags=tags)
        self.tools[tool_name] = tool
-        self.tools_by_types[tool_type][tool_name] = tool
+        for tag in tags:
+            self.tools_by_tags[tag].update({tool_name: tool})
        if verbose:
            logger.info(f"{tool_name} registered")
            logger.info(f"schema made at {str(schema_path)}, can be used for checking")
@ -84,24 +69,24 @@ class ToolRegistry(BaseModel):
    def get_tool(self, key) -> Tool:
        return self.tools.get(key)

-    def get_tools_by_type(self, key) -> dict[str, Tool]:
-        return self.tools_by_types.get(key, {})
+    def get_tools_by_tag(self, key) -> dict[str, Tool]:
+        return self.tools_by_tags.get(key, {})

-    def has_tool_type(self, key) -> bool:
-        return key in self.tool_types
+    def get_all_tools(self) -> dict[str, Tool]:
+        return self.tools

-    def get_tool_type(self, key) -> ToolType:
-        return self.tool_types.get(key)
+    def has_tool_tag(self, key) -> bool:
+        return key in self.tools_by_tags

-    def get_tool_types(self) -> dict[str, ToolType]:
-        return self.tool_types
+    def get_tool_tags(self) -> list[str]:
+        return list(self.tools_by_tags.keys())


 # Registry instance
-TOOL_REGISTRY = ToolRegistry(tool_types=ToolType)
+TOOL_REGISTRY = ToolRegistry()


-def register_tool(tool_type: str = "other", schema_path: str = "", **kwargs):
+def register_tool(tags: list[str] = None, schema_path: str = "", **kwargs):
    """register a tool to registry"""

    def decorator(cls):
@ -117,7 +102,7 @@ def register_tool(tool_type: str = "other", schema_path: str = "", **kwargs):
            tool_path=file_path,
            schema_path=schema_path,
            tool_code=source_code,
-            tool_type=tool_type,
+            tags=tags,
            tool_source_object=cls,
            **kwargs,
        )
@ -142,14 +127,15 @@ def make_schema(tool_source_object, include, path):
    return schema


-def validate_tool_names(tools: list[str], return_tool_object=False) -> list[str]:
-    valid_tools = []
-    for tool_name in tools:
-        if not TOOL_REGISTRY.has_tool(tool_name):
-            logger.warning(
-                f"Specified tool {tool_name} not found and was skipped. Check if you have registered it properly"
-            )
+def validate_tool_names(tools: Union[list[str], str]) -> str:
+    assert isinstance(tools, list), "tools must be a list of str"
+    valid_tools = {}
+    for key in tools:
+        # one can define either tool names or tool type names, take union to get the whole set
+        if TOOL_REGISTRY.has_tool(key):
+            valid_tools.update({key: TOOL_REGISTRY.get_tool(key)})
+        elif TOOL_REGISTRY.has_tool_tag(key):
+            valid_tools.update(TOOL_REGISTRY.get_tools_by_tag(key))
        else:
-            valid_tool = TOOL_REGISTRY.get_tool(tool_name) if return_tool_object else tool_name
-            valid_tools.append(valid_tool)
+            logger.warning(f"invalid tool name or tool type name: {key}, skipped")
    return valid_tools
--- a/metagpt/tools/tool_type.py
+++ b/metagpt/tools/tool_type.py
@ -1,64 +0,0 @@
-from enum import Enum
-
-from metagpt.prompts.tool_types import (
-    DATA_PREPROCESS_PROMPT,
-    EDA_PROMPT,
-    FEATURE_ENGINEERING_PROMPT,
-    IMAGE2WEBPAGE_PROMPT,
-    MODEL_EVALUATE_PROMPT,
-    MODEL_TRAIN_PROMPT,
-)
-from metagpt.tools.tool_data_type import ToolTypeDef
-
-
-class ToolType(Enum):
-    EDA = ToolTypeDef(
-        name="eda",
-        desc="For performing exploratory data analysis",
-        usage_prompt=EDA_PROMPT,
-    )
-    DATA_PREPROCESS = ToolTypeDef(
-        name="data_preprocess",
-        desc="Only for changing value inplace.",
-        usage_prompt=DATA_PREPROCESS_PROMPT,
-    )
-    EMAIL_LOGIN = ToolTypeDef(
-        name="email_login",
-        desc="For logging to an email.",
-    )
-    FEATURE_ENGINEERING = ToolTypeDef(
-        name="feature_engineering",
-        desc="Only for creating new columns for input data.",
-        usage_prompt=FEATURE_ENGINEERING_PROMPT,
-    )
-    MODEL_TRAIN = ToolTypeDef(
-        name="model_train",
-        desc="Only for training model.",
-        usage_prompt=MODEL_TRAIN_PROMPT,
-    )
-    MODEL_EVALUATE = ToolTypeDef(
-        name="model_evaluate",
-        desc="Only for evaluating model.",
-        usage_prompt=MODEL_EVALUATE_PROMPT,
-    )
-    STABLE_DIFFUSION = ToolTypeDef(
-        name="stable_diffusion",
-        desc="Related to text2image, image2image using stable diffusion model.",
-    )
-    IMAGE2WEBPAGE = ToolTypeDef(
-        name="image2webpage",
-        desc="For converting image into webpage code.",
-        usage_prompt=IMAGE2WEBPAGE_PROMPT,
-    )
-    WEBSCRAPING = ToolTypeDef(
-        name="web_scraping",
-        desc="For scraping data from web pages.",
-    )
-    OTHER = ToolTypeDef(name="other", desc="Any tools not in the defined categories")
-
-    def __missing__(self, key):
-        return self.OTHER
-
-    @property
-    def type_name(self):
-        return self.value.name
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -23,10 +23,10 @@ import platform
 import re
 import sys
 import traceback
-import typing
 from io import BytesIO
 from pathlib import Path
-from typing import Any, Callable, List, Tuple, Union
+from typing import Any, Callable, List, Literal, Tuple, Union
+from urllib.parse import quote, unquote

 import aiofiles
 import loguru
@ -361,16 +361,6 @@ def parse_recipient(text):
    return ""


-def create_func_call_config(func_schema: dict) -> dict:
-    """Create new function call config"""
-    tools = [{"type": "function", "function": func_schema}]
-    tool_choice = {"type": "function", "function": {"name": func_schema["name"]}}
-    return {
-        "tools": tools,
-        "tool_choice": tool_choice,
-    }
-
-
 def remove_comments(code_str: str) -> str:
    """Remove comments from code."""
    pattern = r"(\".*?\"|\'.*?\')|(\#.*?$)"
@ -433,23 +423,109 @@ def is_send_to(message: "Message", addresses: set):
 def any_to_name(val):
    """
    Convert a value to its name by extracting the last part of the dotted path.
-
-    :param val: The value to convert.
-
-    :return: The name of the value.
    """
    return any_to_str(val).split(".")[-1]


-def concat_namespace(*args) -> str:
-    return ":".join(str(value) for value in args)
+def concat_namespace(*args, delimiter: str = ":") -> str:
+    """Concatenate fields to create a unique namespace prefix.
+
+    Example:
+        >>> concat_namespace('prefix', 'field1', 'field2', delimiter=":")
+        'prefix:field1:field2'
+    """
+    return delimiter.join(str(value) for value in args)


-def split_namespace(ns_class_name: str) -> List[str]:
-    return ns_class_name.split(":")
+def split_namespace(ns_class_name: str, delimiter: str = ":", maxsplit: int = 1) -> List[str]:
+    """Split a namespace-prefixed name into its namespace-prefix and name parts.
+
+    Example:
+        >>> split_namespace('prefix:classname')
+        ['prefix', 'classname']
+
+        >>> split_namespace('prefix:module:class', delimiter=":", maxsplit=2)
+        ['prefix', 'module', 'class']
+    """
+    return ns_class_name.split(delimiter, maxsplit=maxsplit)


-def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]:
+def auto_namespace(name: str, delimiter: str = ":") -> str:
+    """Automatically handle namespace-prefixed names.
+
+    If the input name is empty, returns a default namespace prefix and name.
+    If the input name is not namespace-prefixed, adds a default namespace prefix.
+    Otherwise, returns the input name unchanged.
+
+    Example:
+        >>> auto_namespace('classname')
+        '?:classname'
+
+        >>> auto_namespace('prefix:classname')
+        'prefix:classname'
+
+        >>> auto_namespace('')
+        '?:?'
+
+        >>> auto_namespace('?:custom')
+        '?:custom'
+    """
+    if not name:
+        return f"?{delimiter}?"
+    v = split_namespace(name, delimiter=delimiter)
+    if len(v) < 2:
+        return f"?{delimiter}{name}"
+    return name
+
+
+def add_affix(text: str, affix: Literal["brace", "url", "none"] = "brace"):
+    """Add affix to encapsulate data.
+
+    Example:
+        >>> add_affix("data", affix="brace")
+        '{data}'
+
+        >>> add_affix("example.com", affix="url")
+        '%7Bexample.com%7D'
+
+        >>> add_affix("text", affix="none")
+        'text'
+    """
+    mappings = {
+        "brace": lambda x: "{" + x + "}",
+        "url": lambda x: quote("{" + x + "}"),
+    }
+    encoder = mappings.get(affix, lambda x: x)
+    return encoder(text)
+
+
+def remove_affix(text, affix: Literal["brace", "url", "none"] = "brace"):
+    """Remove affix to extract encapsulated data.
+
+    Args:
+        text (str): The input text with affix to be removed.
+        affix (str, optional): The type of affix used. Defaults to "brace".
+            Supported affix types: "brace" for removing curly braces, "url" for URL decoding within curly braces.
+
+    Returns:
+        str: The text with affix removed.
+
+    Example:
+        >>> remove_affix('{data}', affix="brace")
+        'data'
+
+        >>> remove_affix('%7Bexample.com%7D', affix="url")
+        'example.com'
+
+        >>> remove_affix('text', affix="none")
+        'text'
+    """
+    mappings = {"brace": lambda x: x[1:-1], "url": lambda x: unquote(x)[1:-1]}
+    decoder = mappings.get(affix, lambda x: x)
+    return decoder(text)
+
+
+def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> Callable[["RetryCallState"], None]:
    """
    Generates a logging function to be used after a call is retried.

@ -636,6 +712,54 @@ def list_files(root: str | Path) -> List[Path]:
    return files


+def parse_json_code_block(markdown_text: str) -> List[str]:
+    json_blocks = re.findall(r"```json(.*?)```", markdown_text, re.DOTALL)
+    return [v.strip() for v in json_blocks]
+
+
+def remove_white_spaces(v: str) -> str:
+    return re.sub(r"(?<!['\"])\s|(?<=['\"])\s", "", v)
+
+
+async def aread_bin(filename: str | Path) -> bytes:
+    """Read binary file asynchronously.
+
+    Args:
+        filename (Union[str, Path]): The name or path of the file to be read.
+
+    Returns:
+        bytes: The content of the file as bytes.
+
+    Example:
+        >>> content = await aread_bin('example.txt')
+        b'This is the content of the file.'
+
+        >>> content = await aread_bin(Path('example.txt'))
+        b'This is the content of the file.'
+    """
+    async with aiofiles.open(str(filename), mode="rb") as reader:
+        content = await reader.read()
+    return content
+
+
+async def awrite_bin(filename: str | Path, data: bytes):
+    """Write binary file asynchronously.
+
+    Args:
+        filename (Union[str, Path]): The name or path of the file to be written.
+        data (bytes): The binary data to be written to the file.
+
+    Example:
+        >>> await awrite_bin('output.bin', b'This is binary data.')
+
+        >>> await awrite_bin(Path('output.bin'), b'Another set of binary data.')
+    """
+    pathname = Path(filename)
+    pathname.parent.mkdir(parents=True, exist_ok=True)
+    async with aiofiles.open(str(pathname), mode="wb") as writer:
+        await writer.write(data)
+
+
 def is_coroutine_func(func: Callable) -> bool:
    return inspect.iscoroutinefunction(func)

@ -676,3 +800,37 @@ def decode_image(img_url_or_b64: str) -> Image:
        img_data = BytesIO(base64.b64decode(b64_data))
        img = Image.open(img_data)
    return img
+
+
+def process_message(messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
+    """convert messages to list[dict]."""
+    from metagpt.schema import Message
+
+    # 全部转成list
+    if not isinstance(messages, list):
+        messages = [messages]
+
+    # 转成list[dict]
+    processed_messages = []
+    for msg in messages:
+        if isinstance(msg, str):
+            processed_messages.append({"role": "user", "content": msg})
+        elif isinstance(msg, dict):
+            assert set(msg.keys()) == set(["role", "content"])
+            processed_messages.append(msg)
+        elif isinstance(msg, Message):
+            processed_messages.append(msg.to_dict())
+        else:
+            raise ValueError(f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!")
+    return processed_messages
+
+
+def log_and_reraise(retry_state: RetryCallState):
+    logger.error(f"Retry attempts exhausted. Last exception: {retry_state.outcome.exception()}")
+    logger.warning(
+        """
+Recommend going to https://deepwisdom.feishu.cn/wiki/MsGnwQBjiif9c3koSJNcYaoSnu4#part-XdatdVlhEojeAfxaaEZcMV3ZniQ
+See FAQ 5.8
+"""
+    )
+    raise retry_state.outcome.exception()
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@ -41,6 +41,8 @@ class CostManager(BaseModel):
        completion_tokens (int): The number of tokens used in the completion.
        model (str): The model used for the API call.
        """
+        if prompt_tokens + completion_tokens == 0 or not model:
+            return
        self.total_prompt_tokens += prompt_tokens
        self.total_completion_tokens += completion_tokens
        if model not in self.token_costs:
--- a/metagpt/utils/di_graph_repository.py
+++ b/metagpt/utils/di_graph_repository.py
@ -4,7 +4,9 @@
@Time    : 2023/12/19
@Author  : mashenquan
@File    : di_graph_repository.py
-@Desc    : Graph repository based on DiGraph
+@Desc    : Graph repository based on DiGraph.
+    This script defines a graph repository class based on a directed graph (DiGraph), providing functionalities
+    specific to handling directed relationships between entities.
 """
 from __future__ import annotations

@ -19,20 +21,41 @@ from metagpt.utils.graph_repository import SPO, GraphRepository


 class DiGraphRepository(GraphRepository):
+    """Graph repository based on DiGraph."""
+
    def __init__(self, name: str, **kwargs):
        super().__init__(name=name, **kwargs)
        self._repo = networkx.DiGraph()

    async def insert(self, subject: str, predicate: str, object_: str):
+        """Insert a new triple into the directed graph repository.
+
+        Args:
+            subject (str): The subject of the triple.
+            predicate (str): The predicate describing the relationship.
+            object_ (str): The object of the triple.
+
+        Example:
+            await my_di_graph_repo.insert(subject="Node1", predicate="connects_to", object_="Node2")
+            # Adds a directed relationship: Node1 connects_to Node2
+        """
        self._repo.add_edge(subject, object_, predicate=predicate)

-    async def upsert(self, subject: str, predicate: str, object_: str):
-        pass
-
-    async def update(self, subject: str, predicate: str, object_: str):
-        pass
-
    async def select(self, subject: str = None, predicate: str = None, object_: str = None) -> List[SPO]:
+        """Retrieve triples from the directed graph repository based on specified criteria.
+
+        Args:
+            subject (str, optional): The subject of the triple to filter by.
+            predicate (str, optional): The predicate describing the relationship to filter by.
+            object_ (str, optional): The object of the triple to filter by.
+
+        Returns:
+            List[SPO]: A list of SPO objects representing the selected triples.
+
+        Example:
+            selected_triples = await my_di_graph_repo.select(subject="Node1", predicate="connects_to")
+            # Retrieves directed relationships where Node1 is the subject and the predicate is 'connects_to'.
+        """
        result = []
        for s, o, p in self._repo.edges(data="predicate"):
            if subject and subject != s:
@ -44,12 +67,41 @@ class DiGraphRepository(GraphRepository):
            result.append(SPO(subject=s, predicate=p, object_=o))
        return result

+    async def delete(self, subject: str = None, predicate: str = None, object_: str = None) -> int:
+        """Delete triples from the directed graph repository based on specified criteria.
+
+        Args:
+            subject (str, optional): The subject of the triple to filter by.
+            predicate (str, optional): The predicate describing the relationship to filter by.
+            object_ (str, optional): The object of the triple to filter by.
+
+        Returns:
+            int: The number of triples deleted from the repository.
+
+        Example:
+            deleted_count = await my_di_graph_repo.delete(subject="Node1", predicate="connects_to")
+            # Deletes directed relationships where Node1 is the subject and the predicate is 'connects_to'.
+        """
+        rows = await self.select(subject=subject, predicate=predicate, object_=object_)
+        if not rows:
+            return 0
+        for r in rows:
+            self._repo.remove_edge(r.subject, r.object_)
+        return len(rows)
+
    def json(self) -> str:
+        """Convert the directed graph repository to a JSON-formatted string."""
        m = networkx.node_link_data(self._repo)
        data = json.dumps(m)
        return data

    async def save(self, path: str | Path = None):
+        """Save the directed graph repository to a JSON file.
+
+        Args:
+            path (Union[str, Path], optional): The directory path where the JSON file will be saved.
+                If not provided, the default path is taken from the 'root' key in the keyword arguments.
+        """
        data = self.json()
        path = path or self._kwargs.get("root")
        if not path.exists():
@ -58,12 +110,21 @@ class DiGraphRepository(GraphRepository):
        await awrite(filename=pathname.with_suffix(".json"), data=data, encoding="utf-8")

    async def load(self, pathname: str | Path):
+        """Load a directed graph repository from a JSON file."""
        data = await aread(filename=pathname, encoding="utf-8")
        m = json.loads(data)
        self._repo = networkx.node_link_graph(m)

    @staticmethod
    async def load_from(pathname: str | Path) -> GraphRepository:
+        """Create and load a directed graph repository from a JSON file.
+
+        Args:
+            pathname (Union[str, Path]): The path to the JSON file to be loaded.
+
+        Returns:
+            GraphRepository: A new instance of the graph repository loaded from the specified JSON file.
+        """
        pathname = Path(pathname)
        name = pathname.with_suffix("").name
        root = pathname.parent
@ -74,9 +135,16 @@ class DiGraphRepository(GraphRepository):

    @property
    def root(self) -> str:
+        """Return the root directory path for the graph repository files."""
        return self._kwargs.get("root")

    @property
    def pathname(self) -> Path:
+        """Return the path and filename to the graph repository file."""
        p = Path(self.root) / self.name
        return p.with_suffix(".json")
+
+    @property
+    def repo(self):
+        """Get the underlying directed graph repository."""
+        return self._repo
--- a/metagpt/utils/graph_repository.py
+++ b/metagpt/utils/graph_repository.py
@ -4,21 +4,28 @@
@Time    : 2023/12/19
@Author  : mashenquan
@File    : graph_repository.py
-@Desc    : Superclass for graph repository.
+@Desc    : Superclass for graph repository. This script defines a superclass for a graph repository, providing a
+    foundation for specific implementations.
+
 """

 from abc import ABC, abstractmethod
+from collections import defaultdict
 from pathlib import Path
 from typing import List

 from pydantic import BaseModel

-from metagpt.logs import logger
-from metagpt.repo_parser import ClassInfo, ClassRelationship, RepoFileInfo
-from metagpt.utils.common import concat_namespace
+from metagpt.repo_parser import DotClassInfo, DotClassRelationship, RepoFileInfo
+from metagpt.utils.common import concat_namespace, split_namespace


 class GraphKeyword:
+    """Basic words for a Graph database.
+
+    This class defines a set of basic words commonly used in the context of a Graph database.
+    """
+
    IS = "is"
    OF = "Of"
    ON = "On"
@ -28,51 +35,149 @@ class GraphKeyword:
    SOURCE_CODE = "source_code"
    NULL = "<null>"
    GLOBAL_VARIABLE = "global_variable"
-    CLASS_FUNCTION = "class_function"
+    CLASS_METHOD = "class_method"
    CLASS_PROPERTY = "class_property"
-    HAS_CLASS_FUNCTION = "has_class_function"
+    HAS_CLASS_METHOD = "has_class_method"
    HAS_CLASS_PROPERTY = "has_class_property"
    HAS_CLASS = "has_class"
+    HAS_DETAIL = "has_detail"
    HAS_PAGE_INFO = "has_page_info"
    HAS_CLASS_VIEW = "has_class_view"
    HAS_SEQUENCE_VIEW = "has_sequence_view"
-    HAS_ARGS_DESC = "has_args_desc"
-    HAS_TYPE_DESC = "has_type_desc"
+    HAS_SEQUENCE_VIEW_VER = "has_sequence_view_ver"
+    HAS_CLASS_USE_CASE = "has_class_use_case"
+    IS_COMPOSITE_OF = "is_composite_of"
+    IS_AGGREGATE_OF = "is_aggregate_of"
+    HAS_PARTICIPANT = "has_participant"


 class SPO(BaseModel):
+    """Graph repository record type.
+
+    This class represents a record in a graph repository with three components:
+    - Subject: The subject of the triple.
+    - Predicate: The predicate describing the relationship between the subject and the object.
+    - Object: The object of the triple.
+
+    Attributes:
+        subject (str): The subject of the triple.
+        predicate (str): The predicate describing the relationship.
+        object_ (str): The object of the triple.
+
+    Example:
+        spo_record = SPO(subject="Node1", predicate="connects_to", object_="Node2")
+        # Represents a triple: Node1 connects_to Node2
+    """
+
    subject: str
    predicate: str
    object_: str


 class GraphRepository(ABC):
+    """Abstract base class for a Graph Repository.
+
+    This class defines the interface for a graph repository, providing methods for inserting, selecting,
+    deleting, and saving graph data. Concrete implementations of this class must provide functionality
+    for these operations.
+    """
+
    def __init__(self, name: str, **kwargs):
        self._repo_name = name
        self._kwargs = kwargs

    @abstractmethod
    async def insert(self, subject: str, predicate: str, object_: str):
-        pass
+        """Insert a new triple into the graph repository.

-    @abstractmethod
-    async def upsert(self, subject: str, predicate: str, object_: str):
-        pass
+        Args:
+            subject (str): The subject of the triple.
+            predicate (str): The predicate describing the relationship.
+            object_ (str): The object of the triple.

-    @abstractmethod
-    async def update(self, subject: str, predicate: str, object_: str):
+        Example:
+            await my_repository.insert(subject="Node1", predicate="connects_to", object_="Node2")
+            # Inserts a triple: Node1 connects_to Node2 into the graph repository.
+        """
        pass

    @abstractmethod
    async def select(self, subject: str = None, predicate: str = None, object_: str = None) -> List[SPO]:
+        """Retrieve triples from the graph repository based on specified criteria.
+
+        Args:
+            subject (str, optional): The subject of the triple to filter by.
+            predicate (str, optional): The predicate describing the relationship to filter by.
+            object_ (str, optional): The object of the triple to filter by.
+
+        Returns:
+            List[SPO]: A list of SPO objects representing the selected triples.
+
+        Example:
+            selected_triples = await my_repository.select(subject="Node1", predicate="connects_to")
+            # Retrieves triples where Node1 is the subject and the predicate is 'connects_to'.
+        """
+        pass
+
+    @abstractmethod
+    async def delete(self, subject: str = None, predicate: str = None, object_: str = None) -> int:
+        """Delete triples from the graph repository based on specified criteria.
+
+        Args:
+            subject (str, optional): The subject of the triple to filter by.
+            predicate (str, optional): The predicate describing the relationship to filter by.
+            object_ (str, optional): The object of the triple to filter by.
+
+        Returns:
+            int: The number of triples deleted from the repository.
+
+        Example:
+            deleted_count = await my_repository.delete(subject="Node1", predicate="connects_to")
+            # Deletes triples where Node1 is the subject and the predicate is 'connects_to'.
+        """
+        pass
+
+    @abstractmethod
+    async def save(self):
+        """Save any changes made to the graph repository.
+
+        Example:
+            await my_repository.save()
+            # Persists any changes made to the graph repository.
+        """
        pass

    @property
    def name(self) -> str:
+        """Get the name of the graph repository."""
        return self._repo_name

    @staticmethod
    async def update_graph_db_with_file_info(graph_db: "GraphRepository", file_info: RepoFileInfo):
+        """Insert information of RepoFileInfo into the specified graph repository.
+
+        This function updates the provided graph repository with information from the given RepoFileInfo object.
+        The function inserts triples related to various dimensions such as file type, class, class method, function,
+        global variable, and page info.
+
+        Triple Patterns:
+        - (?, is, [file type])
+        - (?, has class, ?)
+        - (?, is, [class])
+        - (?, has class method, ?)
+        - (?, has function, ?)
+        - (?, is, [function])
+        - (?, is, global variable)
+        - (?, has page info, ?)
+
+        Args:
+            graph_db (GraphRepository): The graph repository object to be updated.
+            file_info (RepoFileInfo): The RepoFileInfo object containing information to be inserted.
+
+        Example:
+            await update_graph_db_with_file_info(my_graph_repo, my_file_info)
+            # Updates 'my_graph_repo' with information from 'my_file_info'.
+        """
        await graph_db.insert(subject=file_info.file, predicate=GraphKeyword.IS, object_=GraphKeyword.SOURCE_CODE)
        file_types = {".py": "python", ".js": "javascript"}
        file_type = file_types.get(Path(file_info.file).suffix, GraphKeyword.NULL)
@ -95,13 +200,13 @@ class GraphRepository(ABC):
            for fn in methods:
                await graph_db.insert(
                    subject=concat_namespace(file_info.file, class_name),
-                    predicate=GraphKeyword.HAS_CLASS_FUNCTION,
+                    predicate=GraphKeyword.HAS_CLASS_METHOD,
                    object_=concat_namespace(file_info.file, class_name, fn),
                )
                await graph_db.insert(
                    subject=concat_namespace(file_info.file, class_name, fn),
                    predicate=GraphKeyword.IS,
-                    object_=GraphKeyword.CLASS_FUNCTION,
+                    object_=GraphKeyword.CLASS_METHOD,
                )
        for f in file_info.functions:
            # file -> function
@ -133,7 +238,34 @@ class GraphRepository(ABC):
                )

    @staticmethod
-    async def update_graph_db_with_class_views(graph_db: "GraphRepository", class_views: List[ClassInfo]):
+    async def update_graph_db_with_class_views(graph_db: "GraphRepository", class_views: List[DotClassInfo]):
+        """Insert dot format class information into the specified graph repository.
+
+        This function updates the provided graph repository with class information from the given list of DotClassInfo objects.
+        The function inserts triples related to various aspects of class views, including source code, file type, class,
+        class property, class detail, method, composition, and aggregation.
+
+        Triple Patterns:
+        - (?, is, source code)
+        - (?, is, file type)
+        - (?, has class, ?)
+        - (?, is, class)
+        - (?, has class property, ?)
+        - (?, is, class property)
+        - (?, has detail, ?)
+        - (?, has method, ?)
+        - (?, is composite of, ?)
+        - (?, is aggregate of, ?)
+
+        Args:
+            graph_db (GraphRepository): The graph repository object to be updated.
+            class_views (List[DotClassInfo]): List of DotClassInfo objects containing class information to be inserted.
+
+
+        Example:
+            await update_graph_db_with_class_views(my_graph_repo, [class_info1, class_info2])
+            # Updates 'my_graph_repo' with class information from the provided list of DotClassInfo objects.
+        """
        for c in class_views:
            filename, _ = c.package.split(":", 1)
            await graph_db.insert(subject=filename, predicate=GraphKeyword.IS, object_=GraphKeyword.SOURCE_CODE)
@ -146,6 +278,7 @@ class GraphRepository(ABC):
                predicate=GraphKeyword.IS,
                object_=GraphKeyword.CLASS,
            )
+            await graph_db.insert(subject=c.package, predicate=GraphKeyword.HAS_DETAIL, object_=c.model_dump_json())
            for vn, vt in c.attributes.items():
                # class -> property
                await graph_db.insert(
@ -160,33 +293,61 @@ class GraphRepository(ABC):
                    object_=GraphKeyword.CLASS_PROPERTY,
                )
                await graph_db.insert(
-                    subject=concat_namespace(c.package, vn), predicate=GraphKeyword.HAS_TYPE_DESC, object_=vt
+                    subject=concat_namespace(c.package, vn),
+                    predicate=GraphKeyword.HAS_DETAIL,
+                    object_=vt.model_dump_json(),
                )
-            for fn, desc in c.methods.items():
-                if "</I>" in desc and "<I>" not in desc:
-                    logger.error(desc)
+            for fn, ft in c.methods.items():
                # class -> function
                await graph_db.insert(
                    subject=c.package,
-                    predicate=GraphKeyword.HAS_CLASS_FUNCTION,
+                    predicate=GraphKeyword.HAS_CLASS_METHOD,
                    object_=concat_namespace(c.package, fn),
                )
                # function detail
                await graph_db.insert(
                    subject=concat_namespace(c.package, fn),
                    predicate=GraphKeyword.IS,
-                    object_=GraphKeyword.CLASS_FUNCTION,
+                    object_=GraphKeyword.CLASS_METHOD,
                )
                await graph_db.insert(
                    subject=concat_namespace(c.package, fn),
-                    predicate=GraphKeyword.HAS_ARGS_DESC,
-                    object_=desc,
+                    predicate=GraphKeyword.HAS_DETAIL,
+                    object_=ft.model_dump_json(),
+                )
+            for i in c.compositions:
+                await graph_db.insert(
+                    subject=c.package, predicate=GraphKeyword.IS_COMPOSITE_OF, object_=concat_namespace("?", i)
+                )
+            for i in c.aggregations:
+                await graph_db.insert(
+                    subject=c.package, predicate=GraphKeyword.IS_AGGREGATE_OF, object_=concat_namespace("?", i)
                )

    @staticmethod
    async def update_graph_db_with_class_relationship_views(
-        graph_db: "GraphRepository", relationship_views: List[ClassRelationship]
+        graph_db: "GraphRepository", relationship_views: List[DotClassRelationship]
    ):
+        """Insert class relationships and labels into the specified graph repository.
+
+        This function updates the provided graph repository with class relationship information from the given list
+        of DotClassRelationship objects. The function inserts triples representing relationships and labels between
+        classes.
+
+        Triple Patterns:
+        - (?, is relationship of, ?)
+        - (?, is relationship on, ?)
+
+        Args:
+            graph_db (GraphRepository): The graph repository object to be updated.
+            relationship_views (List[DotClassRelationship]): List of DotClassRelationship objects containing
+            class relationship information to be inserted.
+
+        Example:
+            await update_graph_db_with_class_relationship_views(my_graph_repo, [relationship1, relationship2])
+            # Updates 'my_graph_repo' with class relationship information from the provided list of DotClassRelationship objects.
+
+        """
        for r in relationship_views:
            await graph_db.insert(
                subject=r.src, predicate=GraphKeyword.IS + r.relationship + GraphKeyword.OF, object_=r.dest
@ -198,3 +359,32 @@ class GraphRepository(ABC):
                predicate=GraphKeyword.IS + r.relationship + GraphKeyword.ON,
                object_=concat_namespace(r.dest, r.label),
            )
+
+    @staticmethod
+    async def rebuild_composition_relationship(graph_db: "GraphRepository"):
+        """Append namespace-prefixed information to relationship SPO (Subject-Predicate-Object) objects in the graph
+            repository.
+
+        This function updates the provided graph repository by appending namespace-prefixed information to existing
+        relationship SPO objects.
+
+        Args:
+            graph_db (GraphRepository): The graph repository object to be updated.
+        """
+        classes = await graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
+        mapping = defaultdict(list)
+        for c in classes:
+            name = split_namespace(c.subject)[-1]
+            mapping[name].append(c.subject)
+
+        rows = await graph_db.select(predicate=GraphKeyword.IS_COMPOSITE_OF)
+        for r in rows:
+            ns, class_ = split_namespace(r.object_)
+            if ns != "?":
+                continue
+            val = mapping[class_]
+            if len(val) != 1:
+                continue
+            ns_name = val[0]
+            await graph_db.delete(subject=r.subject, predicate=r.predicate, object_=r.object_)
+            await graph_db.insert(subject=r.subject, predicate=r.predicate, object_=ns_name)
--- a/metagpt/utils/parse_docstring.py
+++ b/metagpt/utils/parse_docstring.py
@ -1,45 +1,23 @@
 import re
 from typing import Tuple

-from pydantic import BaseModel
-

 def remove_spaces(text):
    return re.sub(r"\s+", " ", text).strip()


-class DocstringParser(BaseModel):
-    docstring: str
+class DocstringParser:
+    @staticmethod
+    def parse(docstring: str) -> Tuple[str, str]:
+        """Parse the docstring and return the overall description and the parameter description.

-    def parse_desc(self) -> str:
-        """Parse and return the description from the docstring."""
-
-    def parse_params(self) -> list[Tuple[str, str, str]]:
-        """Parse and return the parameters from the docstring.
+        Args:
+            docstring (str): The docstring to be parsed.

        Returns:
-            list[Tuple[str, str, str]]: A list of input paramter info. Each info is a triple of (param name, param type, param description)
+            Tuple[str, str]: A tuple of (overall description, parameter description)
        """

-    def parse_returns(self) -> list[Tuple[str, str]]:
-        """Parse and return the output information from the docstring.
-
-        Returns:
-            list[Tuple[str, str]]: A list of output info. Each info is a tuple of (return type, return description)
-        """
-
-    @staticmethod
-    def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
-        """Check if a parameter is optional and return a processed param_type rid of the optionality info if so"""
-
-    @staticmethod
-    def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
-        """Check if a parameter has a default value and return the default value if so"""
-
-    @staticmethod
-    def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
-        """Check if a parameter description includes an enum and return enum values if so"""
-

 class reSTDocstringParser(DocstringParser):
    """A parser for reStructuredText (reST) docstring"""
@ -48,40 +26,18 @@ class reSTDocstringParser(DocstringParser):
 class GoogleDocstringParser(DocstringParser):
    """A parser for Google-stype docstring"""

-    docstring: str
-
-    def parse_desc(self) -> str:
-        description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", self.docstring, re.DOTALL)
-        description = remove_spaces(description_match.group(1)) if description_match else ""
-        return description
-
-    def parse_params(self) -> list[Tuple[str, str, str]]:
-        args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", self.docstring, re.DOTALL)
-        _args = args_match.group(1).strip() if args_match else ""
-        # variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
-        variable_pattern = re.compile(
-            r"(\w+)\s*\((.*?)\):\s*(.*?)(?=\n\s*\w+\s*\(|\Z)", re.DOTALL
-        )  # (?=\n\w+\s*\(|\Z) is to assert that what follows is either the start of the next parameter (indicated by a newline, some word characters, and an opening parenthesis) or the end of the string (\Z).
-        params = variable_pattern.findall(_args)
-        return params
-
-    def parse_returns(self) -> list[Tuple[str, str]]:
-        returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", self.docstring, re.DOTALL)
-        returns = returns_match.group(1).strip() if returns_match else ""
-        return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
-        returns = return_pattern.findall(returns)
-        return returns
-
    @staticmethod
-    def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
-        return "optional" in param_type, param_type.replace(", optional", "")
+    def parse(docstring: str) -> Tuple[str, str]:
+        if not docstring:
+            return "", ""

-    @staticmethod
-    def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
-        default_val = re.search(r"Defaults to (.+?)\.", param_desc)
-        return (True, default_val.group(1)) if default_val else (False, "")
+        docstring = remove_spaces(docstring)

-    @staticmethod
-    def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
-        enum_val = re.search(r"Enum: \[(.+?)\]", param_desc)
-        return (True, [e.strip() for e in enum_val.group(1).split(",")]) if enum_val else (False, [])
+        if "Args:" in docstring:
+            overall_desc, param_desc = docstring.split("Args:")
+            param_desc = "Args:" + param_desc
+        else:
+            overall_desc = docstring
+            param_desc = ""
+
+        return overall_desc, param_desc
--- a/metagpt/utils/project_repo.py
+++ b/metagpt/utils/project_repo.py
@ -33,6 +33,7 @@ from metagpt.const import (
    TASK_PDF_FILE_REPO,
    TEST_CODES_FILE_REPO,
    TEST_OUTPUTS_FILE_REPO,
+    VISUAL_GRAPH_REPO_FILE_REPO,
 )
 from metagpt.utils.file_repository import FileRepository
 from metagpt.utils.git_repository import GitRepository
@ -69,6 +70,7 @@ class ResourceFileRepositories(FileRepository):
    code_summary: FileRepository
    sd_output: FileRepository
    code_plan_and_change: FileRepository
+    graph_repo: FileRepository

    def __init__(self, git_repo):
        super().__init__(git_repo=git_repo, relative_path=RESOURCES_FILE_REPO)
@ -82,6 +84,7 @@ class ResourceFileRepositories(FileRepository):
        self.code_summary = git_repo.new_file_repository(relative_path=CODE_SUMMARIES_PDF_FILE_REPO)
        self.sd_output = git_repo.new_file_repository(relative_path=SD_OUTPUT_FILE_REPO)
        self.code_plan_and_change = git_repo.new_file_repository(relative_path=CODE_PLAN_AND_CHANGE_PDF_FILE_REPO)
+        self.graph_repo = git_repo.new_file_repository(relative_path=VISUAL_GRAPH_REPO_FILE_REPO)


 class ProjectRepo(FileRepository):
@ -133,6 +136,7 @@ class ProjectRepo(FileRepository):
        code_files = self.with_src_path(path=git_workdir / git_workdir.name).srcs.all_files
        if not code_files:
            return False
+        return bool(code_files)

    def with_src_path(self, path: str | Path) -> ProjectRepo:
        try:
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -43,6 +43,11 @@ TOKEN_COSTS = {
    "mistral-small-latest": {"prompt": 0.002, "completion": 0.006},
    "mistral-medium-latest": {"prompt": 0.0027, "completion": 0.0081},
    "mistral-large-latest": {"prompt": 0.008, "completion": 0.024},
+    "claude-instant-1.2": {"prompt": 0.0008, "completion": 0.0024},
+    "claude-2.0": {"prompt": 0.008, "completion": 0.024},
+    "claude-2.1": {"prompt": 0.008, "completion": 0.024},
+    "claude-3-sonnet-20240229": {"prompt": 0.003, "completion": 0.015},
+    "claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075},
 }


@ -135,7 +140,6 @@ FIREWORKS_GRADE_TOKEN_COSTS = {
    "mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
 }

-
 TOKEN_MAX = {
    "gpt-3.5-turbo": 4096,
    "gpt-3.5-turbo-0301": 4096,
@ -167,6 +171,11 @@ TOKEN_MAX = {
    "mistral-small-latest": 32768,
    "mistral-medium-latest": 32768,
    "mistral-large-latest": 32768,
+    "claude-instant-1.2": 100000,
+    "claude-2.0": 100000,
+    "claude-2.1": 200000,
+    "claude-3-sonnet-20240229": 200000,
+    "claude-3-opus-20240229": 200000,
 }


--- a/metagpt/utils/visual_graph_repo.py
+++ b/metagpt/utils/visual_graph_repo.py
@ -0,0 +1,162 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/12/19
+@Author  : mashenquan
+@File    : visualize_graph.py
+@Desc    : Visualization tool to visualize the class diagrams or sequence diagrams of the graph repository.
+"""
+from __future__ import annotations
+
+import re
+from abc import ABC
+from pathlib import Path
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+from metagpt.const import AGGREGATION, COMPOSITION, GENERALIZATION
+from metagpt.schema import UMLClassView
+from metagpt.utils.common import split_namespace
+from metagpt.utils.di_graph_repository import DiGraphRepository
+from metagpt.utils.graph_repository import GraphKeyword, GraphRepository
+
+
+class _VisualClassView(BaseModel):
+    """Protected class used by VisualGraphRepo internally.
+
+    Attributes:
+        package (str): The package associated with the class.
+        uml (Optional[UMLClassView]): Optional UMLClassView associated with the class.
+        generalizations (List[str]): List of generalizations for the class.
+        compositions (List[str]): List of compositions for the class.
+        aggregations (List[str]): List of aggregations for the class.
+    """
+
+    package: str
+    uml: Optional[UMLClassView] = None
+    generalizations: List[str] = Field(default_factory=list)
+    compositions: List[str] = Field(default_factory=list)
+    aggregations: List[str] = Field(default_factory=list)
+
+    def get_mermaid(self, align: int = 1) -> str:
+        """Creates a Markdown Mermaid class diagram text.
+
+        Args:
+            align (int): Indent count used for alignment.
+
+        Returns:
+            str: The Markdown text representing the Mermaid class diagram.
+        """
+        if not self.uml:
+            return ""
+        prefix = "\t" * align
+
+        mermaid_txt = self.uml.get_mermaid(align=align)
+        for i in self.generalizations:
+            mermaid_txt += f"{prefix}{i} <|-- {self.name}\n"
+        for i in self.compositions:
+            mermaid_txt += f"{prefix}{i} *-- {self.name}\n"
+        for i in self.aggregations:
+            mermaid_txt += f"{prefix}{i} o-- {self.name}\n"
+        return mermaid_txt
+
+    @property
+    def name(self) -> str:
+        """Returns the class name without the namespace prefix."""
+        return split_namespace(self.package)[-1]
+
+
+class VisualGraphRepo(ABC):
+    """Abstract base class for VisualGraphRepo."""
+
+    graph_db: GraphRepository
+
+    def __init__(self, graph_db):
+        self.graph_db = graph_db
+
+
+class VisualDiGraphRepo(VisualGraphRepo):
+    """Implementation of VisualGraphRepo for DiGraph graph repository.
+
+    This class extends VisualGraphRepo to provide specific functionality for a graph repository using DiGraph.
+    """
+
+    @classmethod
+    async def load_from(cls, filename: str | Path):
+        """Load a VisualDiGraphRepo instance from a file."""
+        graph_db = await DiGraphRepository.load_from(str(filename))
+        return cls(graph_db=graph_db)
+
+    async def get_mermaid_class_view(self) -> str:
+        """
+        Returns a Markdown Mermaid class diagram code block object.
+        """
+        rows = await self.graph_db.select(predicate=GraphKeyword.IS, object_=GraphKeyword.CLASS)
+        mermaid_txt = "classDiagram\n"
+        for r in rows:
+            v = await self._get_class_view(ns_class_name=r.subject)
+            mermaid_txt += v.get_mermaid()
+        return mermaid_txt
+
+    async def _get_class_view(self, ns_class_name: str) -> _VisualClassView:
+        """Returns the Markdown Mermaid class diagram code block object for the specified class."""
+        rows = await self.graph_db.select(subject=ns_class_name)
+        class_view = _VisualClassView(package=ns_class_name)
+        for r in rows:
+            if r.predicate == GraphKeyword.HAS_CLASS_VIEW:
+                class_view.uml = UMLClassView.model_validate_json(r.object_)
+            elif r.predicate == GraphKeyword.IS + GENERALIZATION + GraphKeyword.OF:
+                name = split_namespace(r.object_)[-1]
+                name = self._refine_name(name)
+                if name:
+                    class_view.generalizations.append(name)
+            elif r.predicate == GraphKeyword.IS + COMPOSITION + GraphKeyword.OF:
+                name = split_namespace(r.object_)[-1]
+                name = self._refine_name(name)
+                if name:
+                    class_view.compositions.append(name)
+            elif r.predicate == GraphKeyword.IS + AGGREGATION + GraphKeyword.OF:
+                name = split_namespace(r.object_)[-1]
+                name = self._refine_name(name)
+                if name:
+                    class_view.aggregations.append(name)
+        return class_view
+
+    async def get_mermaid_sequence_views(self) -> List[(str, str)]:
+        """Returns all Markdown sequence diagrams with their corresponding graph repository keys."""
+        sequence_views = []
+        rows = await self.graph_db.select(predicate=GraphKeyword.HAS_SEQUENCE_VIEW)
+        for r in rows:
+            sequence_views.append((r.subject, r.object_))
+        return sequence_views
+
+    @staticmethod
+    def _refine_name(name: str) -> str:
+        """Removes impurity content from the given name.
+
+        Example:
+            >>> _refine_name("int")
+            ""
+
+            >>> _refine_name('"Class1"')
+            'Class1'
+
+            >>> _refine_name("pkg.Class1")
+            "Class1"
+        """
+        name = re.sub(r'^[\'"\\\(\)]+|[\'"\\\(\)]+$', "", name)
+        if name in ["int", "float", "bool", "str", "list", "tuple", "set", "dict", "None"]:
+            return ""
+        if "." in name:
+            name = name.split(".")[-1]
+
+        return name
+
+    async def get_mermaid_sequence_view_versions(self) -> List[(str, str)]:
+        """Returns all versioned Markdown sequence diagrams with their corresponding graph repository keys."""
+        sequence_views = []
+        rows = await self.graph_db.select(predicate=GraphKeyword.HAS_SEQUENCE_VIEW_VER)
+        for r in rows:
+            sequence_views.append((r.subject, r.object_))
+        return sequence_views
--- a/requirements.txt
+++ b/requirements.txt
@ -12,6 +12,7 @@ typer==0.9.0
 # google_api_python_client==2.93.0  # Used by search_engine.py
 lancedb==0.4.0
 langchain==0.1.8
+sqlalchemy==2.0.0  # along with langchain
 loguru==0.6.0
 meilisearch==0.21.0
 numpy>=1.24.3,<1.25.0
@ -33,7 +34,7 @@ tqdm==4.65.0
 #unstructured[local-inference]
 # selenium>4
 # webdriver_manager<3.9
-anthropic==0.8.1
+anthropic==0.18.1
 typing-inspect==0.8.0
 libcst==1.0.1
 qdrant-client==1.7.0
@ -70,3 +71,5 @@ Pillow
 imap_tools==1.5.0  # Used by metagpt/tools/libs/email_login.py
 qianfan==0.3.2
 dashscope==1.14.1
+rank-bm25==0.2.2  # for tool recommendation
+jieba==0.42.1  # for tool recommendation
--- a/setup.py
+++ b/setup.py
@ -57,7 +57,7 @@ extras_require["dev"] = (["pylint~=3.0.3", "black~=23.3.0", "isort~=5.12.0", "pr

 setup(
    name="metagpt",
-    version="0.7.2",
+    version="0.7.4",
    description="The Multi-Agent Framework",
    long_description=long_description,
    long_description_content_type="text/markdown",
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -12,6 +12,7 @@ import logging
 import os
 import re
 import uuid
+from pathlib import Path
 from typing import Callable

 import aiohttp.web
@ -270,3 +271,11 @@ def mermaid_mocker(aiohttp_mocker, mermaid_rsp_cache):
    aiohttp_mocker.rsp_cache = mermaid_rsp_cache
    aiohttp_mocker.check_funcs = check_funcs
    yield check_funcs
+
+
+@pytest.fixture
+def git_dir():
+    """Fixture to get the unittest directory."""
+    git_dir = Path(__file__).parent / f"unittest/{uuid.uuid4().hex}"
+    git_dir.mkdir(parents=True, exist_ok=True)
+    return git_dir
--- a/tests/data/graph_db/networkx.class_view.json
+++ b/tests/data/graph_db/networkx.class_view.json
--- a/tests/data/graph_db/networkx.json
+++ b/tests/data/graph_db/networkx.json
--- a/Show more
+++ b/Show more