Merge remote-tracking branch 'origin/mgx_ops' into feat-intention-fs

2026-05-15 11:02:36 +02:00 · 2024-08-12 18:16:35 +08:00 · 2024-08-12 18:16:35 +08:00 · 502cb469a7
commit 502cb469a7
parent c4bd06dcb8 c3d7b57bc6
40 changed files with 832 additions and 168 deletions
--- a/.gitignore
+++ b/.gitignore
@ -189,3 +189,4 @@ cov.xml
 *-structure.json
 *.dot
 .python-version
+tests/data/requirements/*.jpg
--- a/2
+++ b/2
@ -3,7 +3,7 @@ FROM nikolaik/python-nodejs:python3.9-nodejs20-slim

 # Install Debian software needed by MetaGPT and clean up in one RUN command to reduce image size
 RUN apt update &&\
-    apt install -y libgomp1 git chromium fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 --no-install-recommends &&\
+    apt install -y libgomp1 git chromium fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 --no-install-recommends file &&\
    apt clean && rm -rf /var/lib/apt/lists/*

 # Install Mermaid CLI globally
--- a/config/config2.example.yaml
+++ b/config/config2.example.yaml
@ -75,6 +75,7 @@ s3:
  bucket: "test"

 exp_pool:
+  enabled: false
  enable_read: false 
  enable_write: false
  persist_path: .chroma_exp_data # The directory.
--- a/metagpt/actions/di/execute_nb_code.py
+++ b/metagpt/actions/di/execute_nb_code.py
@ -191,7 +191,7 @@ class ExecuteNbCode(Action):
                output_text = remove_log_and_warning_lines(output_text)
            # The useful information of the exception is at the end,
            # the useful information of normal output is at the begining.
-            if '<!DOCTYPE html>' not in output_text:
+            if "<!DOCTYPE html>" not in output_text:
                output_text = output_text[:keep_len] if is_success else output_text[-keep_len:]

            parsed_output.append(output_text)
@ -286,11 +286,7 @@ class ExecuteNbCode(Action):
 def remove_log_and_warning_lines(input_str: str) -> str:
    delete_lines = ["[warning]", "warning:", "[cv]", "[info]"]
    result = "\n".join(
-        [
-            line
-            for line in input_str.split("\n")
-            if not any(dl in line.lower() for dl in delete_lines)
-        ]
+        [line for line in input_str.split("\n") if not any(dl in line.lower() for dl in delete_lines)]
    ).strip()
    return result

--- a/metagpt/actions/research.py
+++ b/metagpt/actions/research.py
@ -180,7 +180,13 @@ class CollectLinks(Action):
            results = self.rank_func(results)
        return [i["link"] for i in results[:num_results]]

-    async def _search_urls(self, query: str, max_results: int) -> list[str]:
+    async def _search_urls(self, query: str, max_results: int) -> list[dict[str, str]]:
+        """Use search_engine to get urls.
+
+        Returns:
+            e.g. [{"title": "...", "link": "...", "snippet", "..."}]
+        """
+
        return await self.search_engine.run(query, max_results=max_results, as_string=False)


--- a/metagpt/actions/search_enhanced_qa.py
+++ b/metagpt/actions/search_enhanced_qa.py
@ -74,6 +74,14 @@ class SearchEnhancedQA(Action):
    java_script_enabled: bool = Field(
        default=False, description="Whether or not to enable JavaScript in the web browser context. Defaults to False."
    )
+    user_agent: str = Field(
+        default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81",
+        description="Specific user agent to use in browser.",
+    )
+    extra_http_headers: dict = Field(
+        default={"sec-ch-ua": 'Chromium";v="125", "Not.A/Brand";v="24'},
+        description="An object containing additional HTTP headers to be sent with every request.",
+    )
    max_chars_per_webpage_summary: int = Field(
        default=4000, description="Maximum summary length for each web page content."
    )
@ -86,7 +94,11 @@ class SearchEnhancedQA(Action):
    def initialize(self):
        if self.web_browse_and_summarize_action is None:
            self.web_browser_engine = WebBrowserEngine.from_browser_config(
-                self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled
+                self.config.browser,
+                proxy=self.config.proxy,
+                java_script_enabled=self.java_script_enabled,
+                extra_http_headers=self.extra_http_headers,
+                user_agent=self.user_agent,
            )

            self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine)
--- a/metagpt/configs/exp_pool_config.py
+++ b/metagpt/configs/exp_pool_config.py
@ -4,6 +4,10 @@ from metagpt.utils.yaml_model import YamlModel


 class ExperiencePoolConfig(YamlModel):
+    enabled: bool = Field(
+        default=False,
+        description="Flag to enable or disable the experience pool. When disabled, both reading and writing are ineffective.",
+    )
    enable_read: bool = Field(default=False, description="Enable to read from experience pool.")
    enable_write: bool = Field(default=False, description="Enable to write to experience pool.")
    persist_path: str = Field(default=".chroma_exp_data", description="The persist path for experience pool.")
--- a/metagpt/configs/search_config.py
+++ b/metagpt/configs/search_config.py
@ -19,6 +19,7 @@ class SearchConfig(YamlModel):
    api_type: SearchEngineType = SearchEngineType.DUCK_DUCK_GO
    api_key: str = ""
    cse_id: str = ""  # for google
+    discovery_service_url: str = ""  # for google
    search_func: Optional[Callable] = None
    params: dict = Field(
        default_factory=lambda: {
--- a/metagpt/const.py
+++ b/metagpt/const.py
@ -152,4 +152,3 @@ AGENT = "agent"

 # SWE agent
 SWE_SETUP_PATH = get_metagpt_package_root() / "metagpt/tools/swe_agent_commands/setup_default.sh"
-SWE_CMD_WORK_DIR = DEFAULT_WORKSPACE_ROOT / "swe_agent_workdir"
--- a/metagpt/exp_pool/decorator.py
+++ b/metagpt/exp_pool/decorator.py
@ -50,7 +50,7 @@ def exp_cache(
    """

    def decorator(func: Callable[..., ReturnType]) -> Callable[..., ReturnType]:
-        if not config.exp_pool.enable_read:
+        if not config.exp_pool.enabled:
            return func

        @functools.wraps(func)
--- a/metagpt/exp_pool/manager.py
+++ b/metagpt/exp_pool/manager.py
@ -74,7 +74,7 @@ class ExperienceManager(BaseModel):
            exp (Experience): The experience to add.
        """

-        if not self.config.exp_pool.enable_write:
+        if not self.config.exp_pool.enabled or not self.config.exp_pool.enable_write:
            return

        self.storage.add_objs([exp])
@ -92,7 +92,7 @@ class ExperienceManager(BaseModel):
            list[Experience]: A list of experiences that match the args.
        """

-        if not self.config.exp_pool.enable_read:
+        if not self.config.exp_pool.enabled or not self.config.exp_pool.enable_read:
            return []

        nodes = await self.storage.aretrieve(req)
--- a/metagpt/exp_pool/schema.py
+++ b/metagpt/exp_pool/schema.py
@ -1,5 +1,5 @@
 """Experience schema."""
-
+import time
 from enum import Enum
 from typing import Optional

@ -67,6 +67,7 @@ class Experience(BaseModel):
    entry_type: EntryType = Field(default=EntryType.AUTOMATIC, description="Type of entry: Manual or Automatic.")
    tag: str = Field(default="", description="Tagging experience.")
    traj: Optional[Trajectory] = Field(default=None, description="Trajectory.")
+    timestamp: Optional[float] = Field(default_factory=time.time)

    def rag_key(self):
        return self.req
--- a/metagpt/ext/cr/actions/code_review.py
+++ b/metagpt/ext/cr/actions/code_review.py
@ -20,7 +20,7 @@ from metagpt.utils.common import parse_json_code_block
 CODE_REVIEW_PROMPT_TEMPLATE = """
 NOTICE
 Let's think and work step by step.
-With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one.
+With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one within 4000 tokens.

 The Patch code has added line number at the first character each line for reading, but the review should focus on new added code inside the `Patch` (lines starting with line number and '+').
 Each point is start with a line number and follows with the point description.
@ -48,14 +48,16 @@ Each point is start with a line number and follows with the point description.

 CodeReview guidelines:
 - Generate code `comment` that do not meet the point description.
- Each `comment` should be restricted inside the `commented_file`
+- Each `comment` should be restricted inside the `commented_file`.
 - Try to provide diverse and insightful comments across different `commented_file`.
 - Don't suggest to add docstring unless it's necessary indeed.
 - If the same code error occurs multiple times, it cannot be omitted, and all places need to be identified.But Don't duplicate at the same place with the same comment!
 - Every line of code in the patch needs to be carefully checked, and laziness cannot be omitted. It is necessary to find out all the places.
 - The `comment` and `point_id` in the Output must correspond to and belong to the same one `Point`.

+Strictly Observe:
 Just print the PR Patch comments in json format like **Output Format**.
+And the output JSON must be able to be parsed by json.loads() without any errors.
 """

 CODE_REVIEW_COMFIRM_SYSTEM_PROMPT = """
@ -128,38 +130,43 @@ class CodeReview(Action):
        points_dict = {point.id: point for point in points}
        new_comments = []
        for cmt in comments:
-            point = points_dict[cmt.get("point_id")]
+            try:
+                point = points_dict[cmt.get("point_id")]

-            code_start_line = cmt.get("code_start_line")
-            code_end_line = cmt.get("code_end_line")
-            # 如果代码位置为空的话，那么就将这条记录丢弃掉
-            if not code_start_line or not code_end_line:
-                logger.info("False")
-                continue
+                code_start_line = cmt.get("code_start_line")
+                code_end_line = cmt.get("code_end_line")
+                # 如果代码位置为空的话，那么就将这条记录丢弃掉
+                if not code_start_line or not code_end_line:
+                    logger.info("False")
+                    continue

-            # 代码增加上下文，提升confirm的准确率
-            code = get_code_block_from_patch(patch, str(max(1, int(code_start_line) - 3)), str(int(code_end_line) + 3))
-            pattern = r"^[ \t\n\r(){}[\];,]*$"
-            if re.match(pattern, code):
+                # 代码增加上下文，提升confirm的准确率
                code = get_code_block_from_patch(
-                    patch, str(max(1, int(code_start_line) - 5)), str(int(code_end_line) + 5)
+                    patch, str(max(1, int(code_start_line) - 3)), str(int(code_end_line) + 3)
                )
-            code_language = "Java"
-            code_file_ext = cmt.get("commented_file", ".java").split(".")[-1]
-            if code_file_ext == ".java":
+                pattern = r"^[ \t\n\r(){}[\];,]*$"
+                if re.match(pattern, code):
+                    code = get_code_block_from_patch(
+                        patch, str(max(1, int(code_start_line) - 5)), str(int(code_end_line) + 5)
+                    )
                code_language = "Java"
-            elif code_file_ext == ".py":
-                code_language = "Python"
-            prompt = CODE_REVIEW_COMFIRM_TEMPLATE.format(
-                code=code,
-                comment=cmt.get("comment"),
-                desc=point.text,
-                example=point.yes_example + "\n" + point.no_example,
-            )
-            system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)]
-            resp = await self.llm.aask(prompt, system_msgs=system_prompt)
-            if "True" in resp or "true" in resp:
-                new_comments.append(cmt)
+                code_file_ext = cmt.get("commented_file", ".java").split(".")[-1]
+                if code_file_ext == ".java":
+                    code_language = "Java"
+                elif code_file_ext == ".py":
+                    code_language = "Python"
+                prompt = CODE_REVIEW_COMFIRM_TEMPLATE.format(
+                    code=code,
+                    comment=cmt.get("comment"),
+                    desc=point.text,
+                    example=point.yes_example + "\n" + point.no_example,
+                )
+                system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)]
+                resp = await self.llm.aask(prompt, system_msgs=system_prompt)
+                if "True" in resp or "true" in resp:
+                    new_comments.append(cmt)
+            except Exception:
+                logger.info("False")
        logger.info(f"original comments num: {len(comments)}, confirmed comments num: {len(new_comments)}")
        return new_comments

--- a/metagpt/ext/cr/utils/cleaner.py
+++ b/metagpt/ext/cr/utils/cleaner.py
@ -10,7 +10,7 @@ def rm_patch_useless_part(patch: PatchSet, used_suffix: list[str] = ["java", "py
    useless_files = []
    for pfile in patch:
        suffix = str(pfile.target_file).split(".")[-1]
-        if suffix not in used_suffix or pfile.is_removed_file or "test" in pfile.target_file.casefold():
+        if suffix not in used_suffix or pfile.is_removed_file:
            useless_files.append(pfile.path)
            continue
        new_patch.append(pfile)
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -11,9 +11,17 @@ Note:
 5. Avoid repeating tasks you have already completed. And end loop when all requirements are met.
 """
 # To ensure compatibility with hard-coded experience, do not add any other content between "# Example" and "# Instruction".
-CMD_PROMPT = """
+
+########################## ignore guidance
+
 # Latest Observation
-{latest_observation}
+# {latest_observation}
+
+# {thought_guidance}
+# Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided.
+
+###########################
+SYSTEM_PROMPT = """

 # Data Structure
 class Task(BaseModel):
@ -30,11 +38,6 @@ class Task(BaseModel):
 {available_commands}
 Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions.

-# Current Plan
-{plan_status}
-
-# Current Task
-{current_task}

 # Example
 {example}
@ -42,6 +45,19 @@ Special Command: Use {{"command_name": "end"}} to do nothing or indicate complet

 # Instruction
 {instruction}
+"""
+
+CMD_PROMPT = """
+{current_state}
+
+# Current Plan
+{plan_status}
+
+# Current Task
+{current_task}
+
+# Restrictions
+{requirements_constraints}

 Pay close attention to the Example provided, you can reuse the example for your current situation if it fits.
 You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially.
@ -49,14 +65,9 @@ If you finish current task, you will automatically take the next task in the exi
 Review the latest plan's outcome, focusing on achievements. If your completed task matches the current, consider it finished.
 In your response, include at least one command.

-# Restrictions
-{requirements_constraints}
-
 # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command:
 Some text indicating your thoughts before JSON is required, such as what tasks have been completed, what tasks are next, how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them.
 Output should adhere to the following format.
-{thought_guidance}
-Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided.
 ```json
 [
    {{
@ -68,6 +79,7 @@ Finally, combine your thoughts, describe what you want to do conscisely in 20 wo
 ```
 Notice: your output JSON data section must start with **```json [**
 """
+
 THOUGHT_GUIDANCE = """
 First, describe the actions you have taken recently.
 Second, describe the messages you have received recently, with a particular emphasis on messages from users. If necessary, develop a plan to address the new user requirements.
@ -100,6 +112,9 @@ JSON_REPAIR_PROMPT = """
 ## json data
 {json_data}

+## json decode error
+{json_decode_error}
+
 ## Output Format
 ```json

--- a/metagpt/prompts/di/swe_agent.py
+++ b/metagpt/prompts/di/swe_agent.py
@ -4,48 +4,6 @@ You can find the original examples from the SWE-agent project here:
 https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs
 """

-SWE_AGENT_SYSTEM_TEMPLATE = """
-SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface.
-
-The special interface consists of a file editor that shows you 100 lines of a file at a time.
-
-Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. 
-If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
-Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them.
-
-Your output should always contain a section of reasoning and a command described in JSON format.
-
-Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes.
-As shown in the example below:
-
-First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
-
-```json
-{{
-    "command_name": "Bash.run",
-    "args": {{
-        "cmd": "ls -a" 
-    }}
-}}
-```
-
-You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
-If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. 
-Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
-
-You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need.
-You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors.
-
-However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
-
-In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix.
-
-# INSTRUCTIONS:
-Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it.
-All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time.
-Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need.
-"""
-
 MINIMAL_EXAMPLE = """
 ## Example of a actions trajectory
 User Requirement and Issue: Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed.
@ -224,9 +182,50 @@ IMPORTANT_TIPS = """
 14. If provided an issue link, you MUST go to the issue page using Browser tool to understand the issue before starting your fix.

 15. When the edit fails, try to enlarge the starting line.
+
+16. Once again, and this is critical: YOU CAN ONLY ENTER ONE COMMAND AT A TIME.
 """

 NEXT_STEP_TEMPLATE = f"""
+SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface.
+
+The special interface consists of a file editor that shows you 100 lines of a file at a time.
+
+Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. 
+If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them.
+
+Your output should always contain a section of reasoning and a command described in JSON format.
+
+Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes.
+As shown in the example below:
+
+First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like.
+
+```json
+{{
+    "command_name": "Bash.run",
+    "args": {{
+        "cmd": "ls -a" 
+    }}
+}}
+```
+
+You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
+If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. 
+Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
+
+You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need.
+You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors.
+
+However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
+
+In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix.
+
+# INSTRUCTIONS:
+Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it.
+All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time.
+Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need.
 # Example of Output
 These examples are provided to demonstrate the output style that expected to be several stages including Locate issue, Fix the bug, Test the fix(Optional), and Submit the changes. It is included to show you how to correctly use the interface. You do not need to follow exactly what is done in the Example. The separator is "-----".
 ----- Beginning of Examples -----
@ -236,10 +235,12 @@ These examples are provided to demonstrate the output style that expected to be
 # IMPORTANT TIPS
 {IMPORTANT_TIPS}

-# Output Next Step
-The current bash state is:
-(Open file: {{open_file}})
-(Current directory: {{working_dir}})

 Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format:"
 """
+CURRENT_BASH_STATE = """
+# Output Next Step
+The current bash state is:
+(Open file: {open_file})
+(Current directory: {working_dir})
+"""
--- a/metagpt/prompts/di/team_leader.py
+++ b/metagpt/prompts/di/team_leader.py
@ -1,16 +1,12 @@
 from metagpt.prompts.di.role_zero import THOUGHT_GUIDANCE

-SYSTEM_PROMPT = """
-You are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.
-When drafting and routing tasks, ALWAYS include necessary or important info inside the instruction, such as path, link, environment to team members, because you are their sole info source.
-Each time you do something, reply to human letting them know what you did.
-"""
-
 TL_INSTRUCTION = """
 You are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.
 Your team member:
 {team_info}
 You should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.
+When drafting and routing tasks, ALWAYS include necessary or important info inside the instruction, such as path, link, environment to team members, because you are their sole info source.
+Each time you do something, reply to human letting them know what you did.
 When creating a new plan involving multiple members, create all tasks at once.
 If plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.
 You should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.
--- a/metagpt/rag/schema.py
+++ b/metagpt/rag/schema.py
@ -1,7 +1,7 @@
 """RAG schemas."""
-
+from enum import Enum
 from pathlib import Path
-from typing import Any, ClassVar, Literal, Optional, Union
+from typing import Any, ClassVar, List, Literal, Optional, Union

 from chromadb.api.types import CollectionMetadata
 from llama_index.core.embeddings import BaseEmbedding
@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator

 from metagpt.config2 import config
 from metagpt.configs.embedding_config import EmbeddingType
+from metagpt.logs import logger
 from metagpt.rag.interface import RAGObject


@ -44,7 +45,13 @@ class FAISSRetrieverConfig(IndexRetrieverConfig):
    @model_validator(mode="after")
    def check_dimensions(self):
        if self.dimensions == 0:
-            self.dimensions = self._embedding_type_to_dimensions.get(config.embedding.api_type, 1536)
+            self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get(
+                config.embedding.api_type, 1536
+            )
+            if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions:
+                logger.warning(
+                    f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536"
+                )

        return self

@ -207,3 +214,51 @@ class ObjectNode(TextNode):
        )

        return metadata.model_dump()
+
+
+class OmniParseType(str, Enum):
+    """OmniParseType"""
+
+    PDF = "PDF"
+    DOCUMENT = "DOCUMENT"
+
+
+class ParseResultType(str, Enum):
+    """The result type for the parser."""
+
+    TXT = "text"
+    MD = "markdown"
+    JSON = "json"
+
+
+class OmniParseOptions(BaseModel):
+    """OmniParse Options config"""
+
+    result_type: ParseResultType = Field(default=ParseResultType.MD, description="OmniParse result_type")
+    parse_type: OmniParseType = Field(default=OmniParseType.DOCUMENT, description="OmniParse parse_type")
+    max_timeout: Optional[int] = Field(default=120, description="Maximum timeout for OmniParse service requests")
+    num_workers: int = Field(
+        default=5,
+        gt=0,
+        lt=10,
+        description="Number of concurrent requests for multiple files",
+    )
+
+
+class OminParseImage(BaseModel):
+    image: str = Field(default="", description="image str bytes")
+    image_name: str = Field(default="", description="image name")
+    image_info: Optional[dict] = Field(default={}, description="image info")
+
+
+class OmniParsedResult(BaseModel):
+    markdown: str = Field(default="", description="markdown text")
+    text: str = Field(default="", description="plain text")
+    images: Optional[List[OminParseImage]] = Field(default=[], description="images")
+    metadata: Optional[dict] = Field(default={}, description="metadata")
+
+    @model_validator(mode="before")
+    def set_markdown(cls, values):
+        if not values.get("markdown"):
+            values["markdown"] = values.get("text")
+        return values
--- a/metagpt/roles/architect.py
+++ b/metagpt/roles/architect.py
@ -8,7 +8,6 @@
 from metagpt.actions import WritePRD
 from metagpt.actions.design_api import WriteDesign
 from metagpt.roles.di.role_zero import RoleZero
-from metagpt.tools.libs.software_development import write_trd_and_framework
 from metagpt.utils.common import tool2name

 ARCHITECT_INSTRUCTION = """
@ -33,7 +32,7 @@ class Architect(RoleZero):

    name: str = "Bob"
    profile: str = "Architect"
-    goal: str = "design a concise, usable, complete software system. ouput the system design or software framework."
+    goal: str = "design a concise, usable, complete software system. output the system design."
    constraints: str = (
        "make sure the architecture is simple enough and use  appropriate open source "
        "libraries. Use same language as user requirement"
@ -45,7 +44,6 @@ class Architect(RoleZero):
        "Editor:write,read,write_content",
        "RoleZero",
        "WriteDesign",
-        write_trd_and_framework.__name__,
    ]

    def __init__(self, **kwargs) -> None:
@ -64,7 +62,6 @@ class Architect(RoleZero):
        self.tool_execution_map.update(tool2name(WriteDesign, ["run"], write_design.run))
        self.tool_execution_map.update(
            {
-                write_trd_and_framework.__name__: write_trd_and_framework,
                "run": write_design.run,  # alias
            }
        )
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -23,6 +23,7 @@ from metagpt.prompts.di.role_zero import (
    QUICK_THINK_PROMPT,
    REGENERATE_PROMPT,
    ROLE_INSTRUCTION,
+    SYSTEM_PROMPT,
    THOUGHT_GUIDANCE,
 )
 from metagpt.roles import Role
@ -46,8 +47,9 @@ class RoleZero(Role):
    name: str = "Zero"
    profile: str = "RoleZero"
    goal: str = ""
-    system_msg: list[str] = None  # Use None to conform to the default value at llm.aask
+    system_prompt: str = SYSTEM_PROMPT  # Use None to conform to the default value at llm.aask
    cmd_prompt: str = CMD_PROMPT
+    cmd_prompt_current_state: str = ""
    thought_guidance: str = THOUGHT_GUIDANCE
    instruction: str = ROLE_INSTRUCTION
    task_type_desc: str = None
@ -152,21 +154,24 @@ class RoleZero(Role):
        tools = await self.tool_recommender.recommend_tools()
        tool_info = json.dumps({tool.name: tool.schemas for tool in tools})

-        ### Make Decision Dynamically ###
-        memory = self.rc.memory.get(self.memory_k)
+        ### Role Instruction ###
        instruction = self.instruction.strip()
+        system_prompt = self.system_prompt.format(
+            task_type_desc=self.task_type_desc, available_commands=tool_info, example=example, instruction=instruction
+        )
+
+        ### Make Decision Dynamically ###
        prompt = self.cmd_prompt.format(
-            example=example,
-            available_commands=tool_info,
-            task_type_desc=self.task_type_desc,
+            current_state=self.cmd_prompt_current_state,
            plan_status=plan_status,
            current_task=current_task,
-            instruction=instruction,
-            thought_guidance=self.thought_guidance,
-            latest_observation=memory[-1].content,
            requirements_constraints=self.requirements_constraints,
        )
+
+        ### Recent Observation ###
+        memory = self.rc.memory.get(self.memory_k)
        memory = await self.parse_browser_actions(memory)
+
        req = self.llm.format_msg(memory + [UserMessage(content=prompt)])
        async with ThoughtReporter(enable_llm_stream=True) as reporter:
            await reporter.async_report({"type": "react"})
@ -175,7 +180,7 @@ class RoleZero(Role):
                current_task=current_task,
                instruction=instruction,
            )
-            self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=self.system_msg, state_data=state_data)
+            self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=[system_prompt], state_data=state_data)

        self.command_rsp = await self._check_duplicates(req, self.command_rsp)

--- a/metagpt/roles/di/swe_agent.py
+++ b/metagpt/roles/di/swe_agent.py
@ -4,9 +4,9 @@ from pydantic import Field

 from metagpt.logs import logger
 from metagpt.prompts.di.swe_agent import (
+    CURRENT_BASH_STATE,
    MINIMAL_EXAMPLE,
    NEXT_STEP_TEMPLATE,
-    SWE_AGENT_SYSTEM_TEMPLATE,
 )
 from metagpt.roles.di.role_zero import RoleZero
 from metagpt.tools.libs.git import git_create_pull
@ -17,7 +17,6 @@ class SWEAgent(RoleZero):
    name: str = "Swen"
    profile: str = "Issue Solver"
    goal: str = "Resolve GitHub issue or bug in any existing codebase"
-    system_msg: str = [SWE_AGENT_SYSTEM_TEMPLATE]
    _instruction: str = NEXT_STEP_TEMPLATE
    tools: list[str] = [
        "Bash",
@ -54,7 +53,7 @@ class SWEAgent(RoleZero):
        """
        state_output = await self.terminal.run("state")
        bash_state = json.loads(state_output)
-        self.instruction = self._instruction.format(**bash_state).strip()
+        self.cmd_prompt_current_state = CURRENT_BASH_STATE.format(**bash_state).strip()

    async def _parse_commands_for_eval(self):
        """
--- a/metagpt/roles/di/team_leader.py
+++ b/metagpt/roles/di/team_leader.py
@ -4,7 +4,6 @@ from metagpt.actions.di.run_command import RunCommand
 from metagpt.prompts.di.team_leader import (
    FINISH_CURRENT_TASK_CMD,
    QUICK_THINK_SYSTEM_PROMPT,
-    SYSTEM_PROMPT,
    TL_INSTRUCTION,
    TL_THOUGHT_GUIDANCE,
 )
@ -19,7 +18,6 @@ class TeamLeader(RoleZero):
    name: str = "Mike"
    profile: str = "Team Leader"
    goal: str = "Manage a team to assist users"
-    system_msg: list[str] = [SYSTEM_PROMPT]
    thought_guidance: str = TL_THOUGHT_GUIDANCE
    # TeamLeader only reacts once each time, but may encounter errors or need to ask human, thus allowing 2 more turns
    max_react_loop: int = 3
--- a/metagpt/strategy/task_type.py
+++ b/metagpt/strategy/task_type.py
@ -8,7 +8,8 @@ from metagpt.prompts.task_type import (
    FEATURE_ENGINEERING_PROMPT,
    IMAGE2WEBPAGE_PROMPT,
    MODEL_EVALUATE_PROMPT,
-    MODEL_TRAIN_PROMPT, WEB_SCRAPING_PROMPT,
+    MODEL_TRAIN_PROMPT,
+    WEB_SCRAPING_PROMPT,
 )


--- a/metagpt/tools/libs/init.py
+++ b/metagpt/tools/libs/init.py
@ -17,7 +17,7 @@ from metagpt.tools.libs import (
    deployer,
    git,
 )
-from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description
+from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description, get_env_default

 _ = (
    data_preprocess,
@ -32,6 +32,7 @@ _ = (
    deployer,
    git,
    get_env,
+    get_env_default,
    get_env_description,
    set_get_env_entry,
    default_get_env,
--- a/metagpt/tools/libs/editor.py
+++ b/metagpt/tools/libs/editor.py
@ -1,11 +1,18 @@
+import base64
 import os
 import shutil
 import subprocess
+from pathlib import Path
+from typing import List, Optional, Union

 from pydantic import BaseModel

 from metagpt.const import DEFAULT_WORKSPACE_ROOT
+from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
+from metagpt.utils import read_docx
+from metagpt.utils.common import aread_bin, awrite_bin
+from metagpt.utils.repo_to_markdown import is_text_file
 from metagpt.utils.report import EditorReporter


@ -38,14 +45,28 @@ class Editor:
        # self.resource.report(path, "path")
        return f"The writing/coding the of the file {os.path.basename(path)}' is now completed. The file '{os.path.basename(path)}' has been successfully created."

-    def read(self, path: str) -> FileBlock:
+    async def read(self, path: str) -> FileBlock:
        """Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
-        with open(path, "r") as f:
-            self.resource.report(path, "path")
-            lines = f.readlines()
+        is_text, mime_type = await is_text_file(path)
+        if is_text:
+            lines = self._read_text(path)
+        elif mime_type == "application/pdf":
+            lines = await self._read_pdf(path)
+        elif mime_type in {
+            "application/msword",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            "application/vnd.ms-word.document.macroEnabled.12",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
+            "application/vnd.ms-word.template.macroEnabled.12",
+        }:
+            lines = await self._read_docx(path)
+        else:
+            return FileBlock(file_path=str(path), block_content="")
+        self.resource.report(str(path), "path")
+
        lines_with_num = [f"{i + 1:03}|{line}" for i, line in enumerate(lines)]
        result = FileBlock(
-            file_path=path,
+            file_path=str(path),
            block_content="".join(lines_with_num),
        )
        return result
@ -196,3 +217,63 @@ class Editor:
        lint_passed = result.returncode == 0
        lint_message = result.stdout
        return lint_passed, lint_message
+
+    @staticmethod
+    def _read_text(path: Union[str, Path]) -> List[str]:
+        with open(str(path), "r") as f:
+            lines = f.readlines()
+        return lines
+
+    @staticmethod
+    async def _read_pdf(path: Union[str, Path]) -> List[str]:
+        result = await Editor._omniparse_read_file(path)
+        if result:
+            return result
+
+        from llama_index.readers.file import PDFReader
+
+        reader = PDFReader()
+        lines = reader.load_data(file=Path(path))
+        return [i.text for i in lines]
+
+    @staticmethod
+    async def _read_docx(path: Union[str, Path]) -> List[str]:
+        result = await Editor._omniparse_read_file(path)
+        if result:
+            return result
+        return read_docx(str(path))
+
+    @staticmethod
+    async def _omniparse_read_file(path: Union[str, Path]) -> Optional[List[str]]:
+        from metagpt.tools.libs import get_env_default
+        from metagpt.utils.omniparse_client import OmniParseClient
+
+        base_url = await get_env_default(key="base_url", app_name="OmniParse", default_value="")
+        if not base_url:
+            return None
+        api_key = await get_env_default(key="api_key", app_name="OmniParse", default_value="")
+        v = await get_env_default(key="timeout", app_name="OmniParse", default_value="120")
+        try:
+            timeout = int(v) or 120
+        except ValueError:
+            timeout = 120
+
+        try:
+            client = OmniParseClient(api_key=api_key, base_url=base_url, max_timeout=timeout)
+            file_data = await aread_bin(filename=path)
+            ret = await client.parse_document(file_input=file_data, bytes_filename=str(path))
+        except (ValueError, Exception) as e:
+            logger.exception(f"{path}: {e}")
+            return None
+        if not ret.images:
+            return [ret.text] if ret.text else None
+
+        result = [ret.text]
+        img_dir = Path(path).parent / (Path(path).name.replace(".", "_") + "_images")
+        img_dir.mkdir(parents=True, exist_ok=True)
+        for i in ret.images:
+            byte_data = base64.b64decode(i.image)
+            filename = img_dir / i.image_name
+            await awrite_bin(filename=filename, data=byte_data)
+            result.append(f"![{i.image_name}]({str(filename)})")
+        return result
--- a/metagpt/tools/libs/env.py
+++ b/metagpt/tools/libs/env.py
@ -7,7 +7,7 @@
@Desc: Implement `get_env`. RFC 216 2.4.2.4.2.
 """
 import os
-from typing import Dict
+from typing import Dict, Optional


 class EnvKeyNotFoundError(Exception):
@ -15,14 +15,26 @@ class EnvKeyNotFoundError(Exception):
        super().__init__(info)


+def to_app_key(key: str, app_name: str = None) -> str:
+    return f"{app_name}-{key}" if app_name else key
+
+
+def split_app_key(app_key: str) -> (str, str):
+    if "-" not in app_key:
+        return "", app_key
+    app_name, key = app_key.split("-", 1)
+    return app_name, key
+
+
 async def default_get_env(key: str, app_name: str = None) -> str:
-    if key in os.environ:
-        return os.environ[key]
+    app_key = to_app_key(key=key, app_name=app_name)
+    if app_key in os.environ:
+        return os.environ[app_key]

    from metagpt.context import Context

    context = Context()
-    val = context.kwargs.get(key, None)
+    val = context.kwargs.get(app_key, None)
    if val is not None:
        return val

@ -32,14 +44,16 @@ async def default_get_env(key: str, app_name: str = None) -> str:
 async def default_get_env_description() -> Dict[str, str]:
    result = {}
    for k in os.environ.keys():
-        call = f'await get_env(key="{k}", app_name="")'
+        app_name, key = split_app_key(k)
+        call = f'await get_env(key="{key}", app_name="{app_name}")'
        result[call] = f"Return the value of environment variable `{k}`."

    from metagpt.context import Context

    context = Context()
    for k in context.kwargs.__dict__.keys():
-        call = f'await get_env(key="{k}", app_name="")'
+        app_name, key = split_app_key(k)
+        call = f'await get_env(key="{key}", app_name="{app_name}")'
        result[call] = f"Get the value of environment variable `{k}`."
    return result

@ -84,6 +98,37 @@ async def get_env(key: str, app_name: str = None) -> str:
    return await default_get_env(key=key, app_name=app_name)


+async def get_env_default(key: str, app_name: str = None, default_value: str = None) -> Optional[str]:
+    """
+    Retrieves the value for the specified environment variable key. If the key is not found,
+    returns the default value.
+
+    Args:
+        key (str): The name of the environment variable to retrieve.
+        app_name (str, optional): The name of the application or component to associate with the environment variable.
+        default_value (str, optional): The default value to return if the environment variable is not found.
+
+    Returns:
+        str or None: The value of the environment variable if found, otherwise the default value.
+
+    Example:
+        >>> from metagpt.tools.libs.env import get_env
+        >>> api_key = await get_env_default(key="NOT_EXISTS_API_KEY", default_value="<API_KEY>")
+        >>> print(api_key)
+        <API_KEY>
+
+        >>> from metagpt.tools.libs.env import get_env
+        >>> api_key = await get_env_default(key="NOT_EXISTS_API_KEY", app_name="GITHUB", default_value="<API_KEY>")
+        >>> print(api_key)
+        <API_KEY>
+
+    """
+    try:
+        return await get_env(key=key, app_name=app_name)
+    except EnvKeyNotFoundError:
+        return default_value
+
+
 async def get_env_description() -> Dict[str, str]:
    global _get_env_description_entry

--- a/metagpt/tools/libs/software_development.py
+++ b/metagpt/tools/libs/software_development.py
@ -21,7 +21,6 @@ from metagpt.actions.requirement_analysis.trd import (
 from metagpt.const import ASSISTANT_ALIAS, DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
 from metagpt.context import Context
 from metagpt.logs import ToolLogItem, log_tool_output, logger
-from metagpt.tools.tool_registry import register_tool
 from metagpt.utils.common import aread
 from metagpt.utils.cost_manager import CostManager

@ -86,7 +85,6 @@ async def mock_asearch_acknowledgement(use_case_actors: str):
    return await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md")


-@register_tool(tags=["system design", "write trd", "Write a TRD"])
 async def write_trd(
    use_case_actors: str,
    user_requirements: str,
@ -155,7 +153,6 @@ async def write_trd(
    return trd


-@register_tool(tags=["system design", "write software framework", "Write a software framework based on a TRD"])
 async def write_framework(
    use_case_actors: str,
    trd: str,
@ -240,7 +237,6 @@ async def write_framework(
    return "## Software Framework" + "".join([f"\n- {i}" for i in file_list])


-@register_tool(tags=["system design", "write trd and framework", "Write a TRD and the framework"])
 async def write_trd_and_framework(
    use_case_actors: str,
    user_requirements: str,
--- a/metagpt/tools/libs/terminal.py
+++ b/metagpt/tools/libs/terminal.py
@ -4,7 +4,7 @@ from asyncio import Queue
 from asyncio.subprocess import PIPE, STDOUT
 from typing import Optional

-from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_CMD_WORK_DIR, SWE_SETUP_PATH
+from metagpt.const import DEFAULT_WORKSPACE_ROOT, SWE_SETUP_PATH
 from metagpt.logs import logger
 from metagpt.tools.tool_registry import register_tool
 from metagpt.utils.report import END_MARKER_VALUE, TerminalReporter
@ -151,10 +151,7 @@ class Bash(Terminal):

    def __init__(self):
        """init"""
-        if not SWE_CMD_WORK_DIR.exists():
-            SWE_CMD_WORK_DIR.mkdir(parents=True)
-
-        os.environ["SWE_CMD_WORK_DIR"] = str(SWE_CMD_WORK_DIR)
+        os.environ["SWE_CMD_WORK_DIR"] = str(DEFAULT_WORKSPACE_ROOT)
        super().__init__()
        self.start_flag = False

--- a/metagpt/tools/search_engine_googleapi.py
+++ b/metagpt/tools/search_engine_googleapi.py
@ -26,6 +26,8 @@ class GoogleAPIWrapper(BaseModel):

    api_key: str
    cse_id: str
+    discovery_service_url: Optional[str] = None
+
    loop: Optional[asyncio.AbstractEventLoop] = None
    executor: Optional[futures.Executor] = None
    proxy: Optional[str] = None
@ -56,7 +58,7 @@ class GoogleAPIWrapper(BaseModel):

    @property
    def google_api_client(self):
-        build_kwargs = {"developerKey": self.api_key}
+        build_kwargs = {"developerKey": self.api_key, "discoveryServiceUrl": self.discovery_service_url}
        if self.proxy:
            parse_result = urlparse(self.proxy)
            proxy_type = parse_result.scheme
--- a/metagpt/tools/web_browser_engine_playwright.py
+++ b/metagpt/tools/web_browser_engine_playwright.py
@ -39,11 +39,9 @@ class PlaywrightWrapper(BaseModel):
            if not any(str.startswith(i, "--proxy-server=") for i in args):
                launch_kwargs["proxy"] = {"server": self.proxy}

-        if "ignore_https_errors" in kwargs:
-            self.context_kwargs["ignore_https_errors"] = kwargs["ignore_https_errors"]
-
-        if "java_script_enabled" in kwargs:
-            self.context_kwargs["java_script_enabled"] = kwargs["java_script_enabled"]
+        for key in ["ignore_https_errors", "java_script_enabled", "extra_http_headers", "user_agent"]:
+            if key in kwargs:
+                self.context_kwargs[key] = kwargs[key]

    async def run(self, url: str, *urls: str, per_page_timeout: float = None) -> WebPage | list[WebPage]:
        async with async_playwright() as ap:
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -852,7 +852,10 @@ async def get_mime_type(filename: str | Path, force_read: bool = False) -> str:
    }

    try:
-        stdout, _, _ = await shell_execute(f"file --mime-type {str(filename)}")
+        stdout, stderr, _ = await shell_execute(f"file --mime-type {str(filename)}")
+        if stderr:
+            logger.debug(f"file:{filename}, error:{stderr}")
+            return guess_mime_type
        ix = stdout.rfind(" ")
        mime_type = stdout[ix:].strip()
        if mime_type == "text/plain" and guess_mime_type in text_set:
--- a/metagpt/utils/omniparse_client.py
+++ b/metagpt/utils/omniparse_client.py
@ -0,0 +1,238 @@
+import mimetypes
+from pathlib import Path
+from typing import Union
+
+import httpx
+
+from metagpt.rag.schema import OmniParsedResult
+from metagpt.utils.common import aread_bin
+
+
+class OmniParseClient:
+    """
+    OmniParse Server Client
+    This client interacts with the OmniParse server to parse different types of media, documents.
+
+    OmniParse API Documentation: https://docs.cognitivelab.in/api
+
+    Attributes:
+        ALLOWED_DOCUMENT_EXTENSIONS (set): A set of supported document file extensions.
+        ALLOWED_AUDIO_EXTENSIONS (set): A set of supported audio file extensions.
+        ALLOWED_VIDEO_EXTENSIONS (set): A set of supported video file extensions.
+    """
+
+    ALLOWED_DOCUMENT_EXTENSIONS = {".pdf", ".ppt", ".pptx", ".doc", ".docx"}
+    ALLOWED_AUDIO_EXTENSIONS = {".mp3", ".wav", ".aac"}
+    ALLOWED_VIDEO_EXTENSIONS = {".mp4", ".mkv", ".avi", ".mov"}
+
+    def __init__(self, api_key: str = None, base_url: str = "http://localhost:8000", max_timeout: int = 120):
+        """
+        Args:
+            api_key: Default None, can be used for authentication later.
+            base_url: Base URL for the API.
+            max_timeout: Maximum request timeout in seconds.
+        """
+        self.api_key = api_key
+        self.base_url = base_url
+        self.max_timeout = max_timeout
+
+        self.parse_media_endpoint = "/parse_media"
+        self.parse_website_endpoint = "/parse_website"
+        self.parse_document_endpoint = "/parse_document"
+
+    async def _request_parse(
+        self,
+        endpoint: str,
+        method: str = "POST",
+        files: dict = None,
+        params: dict = None,
+        data: dict = None,
+        json: dict = None,
+        headers: dict = None,
+        **kwargs,
+    ) -> dict:
+        """
+        Request OmniParse API to parse a document.
+
+        Args:
+            endpoint (str): API endpoint.
+            method (str, optional): HTTP method to use. Default is "POST".
+            files (dict, optional): Files to include in the request.
+            params (dict, optional): Query string parameters.
+            data (dict, optional): Form data to include in the request body.
+            json (dict, optional): JSON data to include in the request body.
+            headers (dict, optional): HTTP headers to include in the request.
+            **kwargs: Additional keyword arguments for httpx.AsyncClient.request()
+
+        Returns:
+            dict: JSON response data.
+        """
+        url = f"{self.base_url}{endpoint}"
+        method = method.upper()
+        headers = headers or {}
+        _headers = {"Authorization": f"Bearer {self.api_key}"} if self.api_key else {}
+        headers.update(**_headers)
+        async with httpx.AsyncClient() as client:
+            response = await client.request(
+                url=url,
+                method=method,
+                files=files,
+                params=params,
+                json=json,
+                data=data,
+                headers=headers,
+                timeout=self.max_timeout,
+                **kwargs,
+            )
+            response.raise_for_status()
+            return response.json()
+
+    async def parse_document(self, file_input: Union[str, bytes, Path], bytes_filename: str = None) -> OmniParsedResult:
+        """
+        Parse document-type data (supports ".pdf", ".ppt", ".pptx", ".doc", ".docx").
+
+        Args:
+            file_input: File path or file byte data.
+            bytes_filename: Filename for byte data, useful for determining MIME type for the HTTP request.
+
+        Raises:
+            ValueError: If the file extension is not allowed.
+
+        Returns:
+            OmniParsedResult: The result of the document parsing.
+        """
+        self.verify_file_ext(file_input, self.ALLOWED_DOCUMENT_EXTENSIONS, bytes_filename)
+        file_info = await self.get_file_info(file_input, bytes_filename)
+        resp = await self._request_parse(self.parse_document_endpoint, files={"file": file_info})
+        data = OmniParsedResult(**resp)
+        return data
+
+    async def parse_pdf(self, file_input: Union[str, bytes, Path]) -> OmniParsedResult:
+        """
+        Parse pdf document.
+
+        Args:
+            file_input: File path or file byte data.
+
+        Raises:
+            ValueError: If the file extension is not allowed.
+
+        Returns:
+            OmniParsedResult: The result of the pdf parsing.
+        """
+        self.verify_file_ext(file_input, {".pdf"})
+        # parse_pdf supports parsing by accepting only the byte data of the file.
+        file_info = await self.get_file_info(file_input, only_bytes=True)
+        endpoint = f"{self.parse_document_endpoint}/pdf"
+        resp = await self._request_parse(endpoint=endpoint, files={"file": file_info})
+        data = OmniParsedResult(**resp)
+        return data
+
+    async def parse_video(self, file_input: Union[str, bytes, Path], bytes_filename: str = None) -> dict:
+        """
+        Parse video-type data (supports ".mp4", ".mkv", ".avi", ".mov").
+
+        Args:
+            file_input: File path or file byte data.
+            bytes_filename: Filename for byte data, useful for determining MIME type for the HTTP request.
+
+        Raises:
+            ValueError: If the file extension is not allowed.
+
+        Returns:
+            dict: JSON response data.
+        """
+        self.verify_file_ext(file_input, self.ALLOWED_VIDEO_EXTENSIONS, bytes_filename)
+        file_info = await self.get_file_info(file_input, bytes_filename)
+        return await self._request_parse(f"{self.parse_media_endpoint}/video", files={"file": file_info})
+
+    async def parse_audio(self, file_input: Union[str, bytes, Path], bytes_filename: str = None) -> dict:
+        """
+        Parse audio-type data (supports ".mp3", ".wav", ".aac").
+
+        Args:
+            file_input: File path or file byte data.
+            bytes_filename: Filename for byte data, useful for determining MIME type for the HTTP request.
+
+        Raises:
+            ValueError: If the file extension is not allowed.
+
+        Returns:
+            dict: JSON response data.
+        """
+        self.verify_file_ext(file_input, self.ALLOWED_AUDIO_EXTENSIONS, bytes_filename)
+        file_info = await self.get_file_info(file_input, bytes_filename)
+        return await self._request_parse(f"{self.parse_media_endpoint}/audio", files={"file": file_info})
+
+    @staticmethod
+    def verify_file_ext(file_input: Union[str, bytes, Path], allowed_file_extensions: set, bytes_filename: str = None):
+        """
+        Verify the file extension.
+
+        Args:
+            file_input: File path or file byte data.
+            allowed_file_extensions: Set of allowed file extensions.
+            bytes_filename: Filename to use for verification when `file_input` is byte data.
+
+        Raises:
+            ValueError: If the file extension is not allowed.
+
+        Returns:
+        """
+        verify_file_path = None
+        if isinstance(file_input, (str, Path)):
+            verify_file_path = str(file_input)
+        elif isinstance(file_input, bytes) and bytes_filename:
+            verify_file_path = bytes_filename
+
+        if not verify_file_path:
+            # Do not verify if only byte data is provided
+            return
+
+        file_ext = Path(verify_file_path).suffix
+        if file_ext not in allowed_file_extensions:
+            raise ValueError(f"Not allowed {file_ext} File extension must be one of {allowed_file_extensions}")
+
+    @staticmethod
+    async def get_file_info(
+        file_input: Union[str, bytes, Path],
+        bytes_filename: str = None,
+        only_bytes: bool = False,
+    ) -> Union[bytes, tuple]:
+        """
+        Get file information.
+
+        Args:
+            file_input: File path or file byte data.
+            bytes_filename: Filename to use when uploading byte data, useful for determining MIME type.
+            only_bytes: Whether to return only byte data. Default is False, which returns a tuple.
+
+        Raises:
+            ValueError: If bytes_filename is not provided when file_input is bytes or if file_input is not a valid type.
+
+        Notes:
+            Since `parse_document`,`parse_video`, `parse_audio` supports parsing various file types,
+            the MIME type of the file must be specified when uploading.
+
+        Returns: [bytes, tuple]
+            Returns bytes if only_bytes is True, otherwise returns a tuple (filename, file_bytes, mime_type).
+        """
+        if isinstance(file_input, (str, Path)):
+            filename = Path(file_input).name
+            file_bytes = await aread_bin(file_input)
+
+            if only_bytes:
+                return file_bytes
+
+            mime_type = mimetypes.guess_type(file_input)[0]
+            return filename, file_bytes, mime_type
+        elif isinstance(file_input, bytes):
+            if only_bytes:
+                return file_input
+            if not bytes_filename:
+                raise ValueError("bytes_filename must be set when passing bytes")
+
+            mime_type = mimetypes.guess_type(bytes_filename)[0]
+            return bytes_filename, file_input, mime_type
+        else:
+            raise ValueError("file_input must be a string (file path) or bytes.")
--- a/metagpt/utils/repo_to_markdown.py
+++ b/metagpt/utils/repo_to_markdown.py
@ -7,7 +7,7 @@ from __future__ import annotations

 import re
 from pathlib import Path
-from typing import Tuple
+from typing import Tuple, Union

 from gitignore_parser import parse_gitignore

@ -82,7 +82,7 @@ async def _write_files(repo_path, gitignore_rules=None) -> str:


 async def _write_file(filename: Path, repo_path: Path) -> str:
-    is_text, mime_type = await _is_text_file(filename)
+    is_text, mime_type = await is_text_file(filename)
    if not is_text:
        logger.info(f"Ignore content: {filename}")
        return ""
@ -100,7 +100,17 @@ async def _write_file(filename: Path, repo_path: Path) -> str:
        return ""


-async def _is_text_file(filename: Path) -> Tuple[bool, str]:
+async def is_text_file(filename: Union[str, Path]) -> Tuple[bool, str]:
+    """
+    Determines if the specified file is a text file based on its MIME type.
+
+    Args:
+        filename (Union[str, Path]): The path to the file.
+
+    Returns:
+        Tuple[bool, str]: A tuple where the first element indicates if the file is a text file
+        (True for text file, False otherwise), and the second element is the MIME type of the file.
+    """
    pass_set = {
        "application/json",
        "application/vnd.chipnuts.karaoke-mmd",
@ -129,7 +139,7 @@ async def _is_text_file(filename: Path) -> Tuple[bool, str]:
        "image/vnd.microsoft.icon",
        "video/mp4",
    }
-    mime_type = await get_mime_type(filename, force_read=True)
+    mime_type = await get_mime_type(Path(filename), force_read=True)
    v = "text/" in mime_type or mime_type in pass_set
    if v:
        return True, mime_type
--- a/tests/data/movie/trailer.mp4
+++ b/tests/data/movie/trailer.mp4
--- a/tests/data/requirements/2.pdf
+++ b/tests/data/requirements/2.pdf
--- a/tests/data/ui/1b.png.html
+++ b/tests/data/ui/1b.png.html
@ -0,0 +1,164 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>法务小超人</title>
+    <style>
+        body {
+            margin: 0;
+            font-family: Arial, sans-serif;
+        }
+        .container {
+            text-align: center;
+            background-color: #f8f8f8;
+            padding: 10px;
+        }
+        .header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding: 10px 20px;
+            background-color: #fff;
+            border-bottom: 1px solid #e6e6e6;
+        }
+        .header img {
+            height: 24px;
+        }
+        .header .menu-icons {
+            display: flex;
+            gap: 10px;
+        }
+        .header .menu-icons img {
+            height: 24px;
+        }
+        .search-section {
+            background-image: url('background-image.jpg'); /* Replace with actual background image */
+            background-size: cover;
+            color: white;
+            padding: 40px 20px;
+        }
+        .search-section h1 {
+            margin: 0;
+            font-size: 24px;
+        }
+        .search-input {
+            margin: 20px 0;
+            display: flex;
+            justify-content: center;
+        }
+        .search-input input {
+            width: 300px;
+            padding: 10px;
+            border-radius: 5px 0 0 5px;
+            border: none;
+        }
+        .search-input button {
+            padding: 10px 20px;
+            border-radius: 0 5px 5px 0;
+            border: none;
+            background-color: #007BFF;
+            color: white;
+            cursor: pointer;
+        }
+        .search-result-count {
+            margin: 10px 0;
+        }
+        .qa-section {
+            background-color: #fff;
+            padding: 20px;
+            margin-top: -20px;
+            border-top-left-radius: 20px;
+            border-top-right-radius: 20px;
+            box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.1);
+        }
+        .qa-item {
+            display: flex;
+            justify-content: space-between;
+            padding: 15px 0;
+            border-bottom: 1px solid #e6e6e6;
+        }
+        .qa-item:last-child {
+            border-bottom: none;
+        }
+        .qa-item a {
+            text-decoration: none;
+            color: #333;
+        }
+        .qa-item img {
+            height: 20px;
+        }
+        .footer {
+            display: flex;
+            justify-content: space-around;
+            background-color: #fff;
+            border-top: 1px solid #e6e6e6;
+            padding: 10px 0;
+            position: fixed;
+            width: 100%;
+            bottom: 0;
+        }
+        .footer img {
+            height: 24px;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <div class="logo">
+                <img src="logo.png" alt="法务小超人"> <!-- Replace with actual logo -->
+            </div>
+            <div class="menu-icons">
+                <img src="menu-icon.png" alt="Menu"> <!-- Replace with actual menu icon -->
+                <img src="more-icon.png" alt="More"> <!-- Replace with actual more icon -->
+            </div>
+        </div>
+        <div class="search-section">
+            <h1>法律意见查询</h1>
+            <div class="search-input">
+                <input type="text" placeholder="输入国家名查询法律意见">
+                <button>
+                    <img src="search-icon.png" alt="Search"> <!-- Replace with actual search icon -->
+                </button>
+            </div>
+            <div class="search-result-count">
+                已收录法律意见8394篇
+            </div>
+        </div>
+        <div class="qa-section">
+            <h2>法务 Q&A</h2>
+            <div class="qa-item">
+                <a href="#">国际法务接口人</a>
+                <img src="arrow.png" alt="Arrow"> <!-- Replace with actual arrow icon -->
+            </div>
+            <div class="qa-item">
+                <a href="#">国内法务接口人</a>
+                <img src="arrow.png" alt="Arrow"> <!-- Replace with actual arrow icon -->
+            </div>
+            <div class="qa-item">
+                <a href="#">国际法律协议合同办理指引</a>
+                <img src="arrow.png" alt="Arrow"> <!-- Replace with actual arrow icon -->
+            </div>
+            <div class="qa-item">
+                <a href="#">国内法律协议合同办理指引</a>
+                <img src="arrow.png" alt="Arrow"> <!-- Replace with actual arrow icon -->
+            </div>
+        </div>
+        <div class="footer">
+            <div class="footer-item">
+                <img src="home-icon.png" alt="首页"> <!-- Replace with actual home icon -->
+                <div>首页</div>
+            </div>
+            <div class="footer-item">
+                <img src="template-icon.png" alt="模板"> <!-- Replace with actual template icon -->
+                <div>模板</div>
+            </div>
+            <div class="footer-item">
+                <img src="my-icon.png" alt="我的"> <!-- Replace with actual my icon -->
+                <div>我的</div>
+            </div>
+        </div>
+    </div>
+</body>
+</html>
--- a/tests/metagpt/exp_pool/test_decorator.py
+++ b/tests/metagpt/exp_pool/test_decorator.py
@ -159,7 +159,7 @@ class TestExpCache:

    @pytest.mark.asyncio
    async def test_exp_cache_disabled(self, mock_config, mock_exp_manager):
-        mock_config.exp_pool.enable_read = False
+        mock_config.exp_pool.enabled = False

        @exp_cache(manager=mock_exp_manager)
        async def test_func(req):
--- a/tests/metagpt/exp_pool/test_manager.py
+++ b/tests/metagpt/exp_pool/test_manager.py
@ -10,7 +10,7 @@ from metagpt.exp_pool.schema import QueryType
 class TestExperienceManager:
    @pytest.fixture
    def mock_config(self):
-        return Config(llm=LLMConfig(), exp_pool=ExperiencePoolConfig(enable_write=True, enable_read=True))
+        return Config(llm=LLMConfig(), exp_pool=ExperiencePoolConfig(enable_write=True, enable_read=True, enabled=True))

    @pytest.fixture
    def mock_storage(self, mocker):
--- a/tests/metagpt/roles/di/run_data_analyst.py
+++ b/tests/metagpt/roles/di/run_data_analyst.py
@ -1,7 +1,7 @@
 from metagpt.roles.di.data_analyst import DataAnalyst

-HOUSE_PRICE_TRAIN_PATH = '/data/house-prices-advanced-regression-techniques/split_train.csv'
-HOUSE_PRICE_EVAL_PATH = '/data/house-prices-advanced-regression-techniques/split_eval.csv'
+HOUSE_PRICE_TRAIN_PATH = "/data/house-prices-advanced-regression-techniques/split_train.csv"
+HOUSE_PRICE_EVAL_PATH = "/data/house-prices-advanced-regression-techniques/split_eval.csv"
 HOUSE_PRICE_REQ = f"""
 This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{HOUSE_PRICE_TRAIN_PATH}', eval data path: '{HOUSE_PRICE_EVAL_PATH}'.
 """
--- a/tests/metagpt/tools/libs/test_editor.py
+++ b/tests/metagpt/tools/libs/test_editor.py
@ -27,7 +27,7 @@ def test_file():

 EXPECTED_SEARCHED_BLOCK = FileBlock(
    file_path=str(TEST_FILE_PATH),
-    block_content='# this is line one\ndef test_function_for_fm():\n    "some docstring"\n    a = 1\n    b = 2\n',
+    block_content='001|# this is line one\n002|def test_function_for_fm():\n003|    "some docstring"\n004|    a = 1\n005|    b = 2\n',
    block_start_line=1,
    block_end_line=5,
    symbol="def test_function_for_fm",
@ -50,6 +50,7 @@ def test_function_for_fm():
 """.strip()


+@pytest.mark.skip
 def test_replace_content(test_file):
    Editor().write_content(
        file_path=str(TEST_FILE_PATH),
@ -89,6 +90,7 @@ def test_function_for_fm():
 """.strip()


+@pytest.mark.skip
 def test_insert_content(test_file):
    Editor().write_content(
        file_path=str(TEST_FILE_PATH),
@ -101,6 +103,7 @@ def test_insert_content(test_file):
    assert new_content == EXPECTED_CONTENT_AFTER_INSERT


+@pytest.mark.skip
 def test_new_content_wrong_indentation(test_file):
    msg = Editor().write_content(
        file_path=str(TEST_FILE_PATH),
@ -111,6 +114,7 @@ def test_new_content_wrong_indentation(test_file):
    assert "failed" in msg


+@pytest.mark.skip
 def test_new_content_format_issue(test_file):
    msg = Editor().write_content(
        file_path=str(TEST_FILE_PATH),
@ -119,3 +123,33 @@ def test_new_content_format_issue(test_file):
        new_block_content="    # This is the new line to be inserted, at line 3  ",  # trailing spaces are format issue only, and should not throw an error
    )
    assert "failed" not in msg
+
+
+@pytest.mark.parametrize(
+    "filename",
+    [
+        TEST_DATA_PATH / "requirements/1.txt",
+        TEST_DATA_PATH / "requirements/1.json",
+        TEST_DATA_PATH / "requirements/1.constraint.md",
+        TEST_DATA_PATH / "requirements/pic/1.png",
+        TEST_DATA_PATH / "docx_for_test.docx",
+        TEST_DATA_PATH / "requirements/2.pdf",
+        TEST_DATA_PATH / "audio/hello.mp3",
+        TEST_DATA_PATH / "code/python/1.py",
+        TEST_DATA_PATH / "code/js/1.js",
+        TEST_DATA_PATH / "ui/1b.png.html",
+        TEST_DATA_PATH / "movie/trailer.mp4",
+    ],
+)
+@pytest.mark.asyncio
+async def test_read_files(filename):
+    editor = Editor()
+    file_block = await editor.read(filename)
+    assert file_block
+    assert file_block.file_path
+    if filename.suffix not in [".png", ".mp3", ".mp4"]:
+        assert file_block.block_content
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-s"])