From 075a82e6f499856927d9b7465c2804e035231241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Thu, 8 Aug 2024 20:44:12 +0800 Subject: [PATCH 01/19] =?UTF-8?q?=E5=B0=86=E8=A7=92=E8=89=B2=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=EF=BC=8C=E8=A7=92=E8=89=B2instruction=E5=92=8C?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E5=8A=A0=E5=85=A5=E5=BC=80=E5=A7=8B=E7=9A=84?= =?UTF-8?q?system=5Fprompt=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 36 ++++++++++++++++++++++----------- metagpt/roles/di/role_zero.py | 25 ++++++++++++++++------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 8443a7960..de4bdb066 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -11,9 +11,14 @@ Note: 5. Avoid repeating tasks you have already completed. And end loop when all requirements are met. """ # To ensure compatibility with hard-coded experience, do not add any other content between "# Example" and "# Instruction". -CMD_PROMPT = """ + +#################### # Latest Observation -{latest_observation} +# {latest_observation} + + +########################### +INSTRUCTION_GUIDANCE = """ # Data Structure class Task(BaseModel): @@ -30,11 +35,6 @@ class Task(BaseModel): {available_commands} Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions. -# Current Plan -{plan_status} - -# Current Task -{current_task} # Example {example} @@ -42,6 +42,22 @@ Special Command: Use {{"command_name": "end"}} to do nothing or indicate complet # Instruction {instruction} +""" + + +# {thought_guidance} +# Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided. + +CMD_PROMPT = """ +# Current Plan +{plan_status} + +# Current Task +{current_task} + +# Restrictions +{requirements_constraints} + Pay close attention to the Example provided, you can reuse the example for your current situation if it fits. You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially. @@ -49,14 +65,9 @@ If you finish current task, you will automatically take the next task in the exi Review the latest plan's outcome, focusing on achievements. If your completed task matches the current, consider it finished. In your response, include at least one command. -# Restrictions -{requirements_constraints} - # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command: Some text indicating your thoughts before JSON is required, such as what tasks have been completed, what tasks are next, how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them. Output should adhere to the following format. -{thought_guidance} -Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided. ```json [ {{ @@ -68,6 +79,7 @@ Finally, combine your thoughts, describe what you want to do conscisely in 20 wo ``` Notice: your output JSON data section must start with **```json [** """ + THOUGHT_GUIDANCE = """ First, describe the actions you have taken recently. Second, describe the messages you have received recently, with a particular emphasis on messages from users. If necessary, develop a plan to address the new user requirements. diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 773124dcc..388eca13e 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -19,6 +19,7 @@ from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( ASK_HUMAN_COMMAND, CMD_PROMPT, + INSTRUCTION_GUIDANCE, JSON_REPAIR_PROMPT, QUICK_THINK_PROMPT, REGENERATE_PROMPT, @@ -47,6 +48,7 @@ class RoleZero(Role): profile: str = "RoleZero" goal: str = "" system_msg: list[str] = None # Use None to conform to the default value at llm.aask + instruction_system_prpomt: str = INSTRUCTION_GUIDANCE cmd_prompt: str = CMD_PROMPT thought_guidance: str = THOUGHT_GUIDANCE instruction: str = ROLE_INSTRUCTION @@ -152,18 +154,25 @@ class RoleZero(Role): tools = await self.tool_recommender.recommend_tools() tool_info = json.dumps({tool.name: tool.schemas for tool in tools}) - ### Make Decision Dynamically ### - memory = self.rc.memory.get(self.memory_k) instruction = self.instruction.strip() - prompt = self.cmd_prompt.format( + instruction_system_prpomt = self.instruction_system_prpomt.format( example=example, available_commands=tool_info, + instruction=instruction, task_type_desc=self.task_type_desc, + ) + guidance_system_msgs = [instruction_system_prpomt] + if self.system_msg: + guidance_system_msgs = self.system_msg + guidance_system_msgs + + # print(("\n"+"="*10+"\n").join(guidance_system_msgs)) + ### Make Decision Dynamically ### + memory = self.rc.memory.get(self.memory_k) + prompt = self.cmd_prompt.format( plan_status=plan_status, current_task=current_task, - instruction=instruction, - thought_guidance=self.thought_guidance, - latest_observation=memory[-1].content, + # thought_guidance=self.thought_guidance, + # latest_observation=memory[-1].content, requirements_constraints=self.requirements_constraints, ) memory = await self.parse_browser_actions(memory) @@ -175,7 +184,9 @@ class RoleZero(Role): current_task=current_task, instruction=instruction, ) - self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=self.system_msg, state_data=state_data) + self.command_rsp = await self.llm_cached_aask( + req=req, system_msgs=guidance_system_msgs, state_data=state_data + ) self.command_rsp = await self._check_duplicates(req, self.command_rsp) From ea990d8d6876b93daaee1779284575a232c9915e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Thu, 8 Aug 2024 21:10:08 +0800 Subject: [PATCH 02/19] combine_system_prompt --- metagpt/roles/di/role_zero.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 388eca13e..8093eda40 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -163,16 +163,13 @@ class RoleZero(Role): ) guidance_system_msgs = [instruction_system_prpomt] if self.system_msg: - guidance_system_msgs = self.system_msg + guidance_system_msgs + guidance_system_msgs = [self.system_msg[0] + guidance_system_msgs[0]] - # print(("\n"+"="*10+"\n").join(guidance_system_msgs)) ### Make Decision Dynamically ### memory = self.rc.memory.get(self.memory_k) prompt = self.cmd_prompt.format( plan_status=plan_status, current_task=current_task, - # thought_guidance=self.thought_guidance, - # latest_observation=memory[-1].content, requirements_constraints=self.requirements_constraints, ) memory = await self.parse_browser_actions(memory) From 36db2b067efed8d86ba00572c103b4411040be27 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Fri, 9 Aug 2024 16:05:48 +0800 Subject: [PATCH 03/19] exp_pool add `enabled` --- config/config2.example.yaml | 1 + metagpt/configs/exp_pool_config.py | 4 ++++ metagpt/exp_pool/decorator.py | 2 +- metagpt/exp_pool/manager.py | 4 ++-- metagpt/exp_pool/schema.py | 3 ++- tests/metagpt/exp_pool/test_decorator.py | 2 +- tests/metagpt/exp_pool/test_manager.py | 2 +- 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/config/config2.example.yaml b/config/config2.example.yaml index 330b73680..ba480d984 100644 --- a/config/config2.example.yaml +++ b/config/config2.example.yaml @@ -75,6 +75,7 @@ s3: bucket: "test" exp_pool: + enabled: false enable_read: false enable_write: false persist_path: .chroma_exp_data # The directory. diff --git a/metagpt/configs/exp_pool_config.py b/metagpt/configs/exp_pool_config.py index 786558ed9..e2872179f 100644 --- a/metagpt/configs/exp_pool_config.py +++ b/metagpt/configs/exp_pool_config.py @@ -4,6 +4,10 @@ from metagpt.utils.yaml_model import YamlModel class ExperiencePoolConfig(YamlModel): + enabled: bool = Field( + default=False, + description="Flag to enable or disable the experience pool. When disabled, both reading and writing are ineffective.", + ) enable_read: bool = Field(default=False, description="Enable to read from experience pool.") enable_write: bool = Field(default=False, description="Enable to write to experience pool.") persist_path: str = Field(default=".chroma_exp_data", description="The persist path for experience pool.") diff --git a/metagpt/exp_pool/decorator.py b/metagpt/exp_pool/decorator.py index 21242277d..777d55ca9 100644 --- a/metagpt/exp_pool/decorator.py +++ b/metagpt/exp_pool/decorator.py @@ -50,7 +50,7 @@ def exp_cache( """ def decorator(func: Callable[..., ReturnType]) -> Callable[..., ReturnType]: - if not config.exp_pool.enable_read: + if not config.exp_pool.enabled: return func @functools.wraps(func) diff --git a/metagpt/exp_pool/manager.py b/metagpt/exp_pool/manager.py index 1fcabc0d2..b6ae9c0a3 100644 --- a/metagpt/exp_pool/manager.py +++ b/metagpt/exp_pool/manager.py @@ -74,7 +74,7 @@ class ExperienceManager(BaseModel): exp (Experience): The experience to add. """ - if not self.config.exp_pool.enable_write: + if not self.config.exp_pool.enabled or not self.config.exp_pool.enable_write: return self.storage.add_objs([exp]) @@ -92,7 +92,7 @@ class ExperienceManager(BaseModel): list[Experience]: A list of experiences that match the args. """ - if not self.config.exp_pool.enable_read: + if not self.config.exp_pool.enabled or not self.config.exp_pool.enable_read: return [] nodes = await self.storage.aretrieve(req) diff --git a/metagpt/exp_pool/schema.py b/metagpt/exp_pool/schema.py index 627dcbb4e..b119e5850 100644 --- a/metagpt/exp_pool/schema.py +++ b/metagpt/exp_pool/schema.py @@ -1,5 +1,5 @@ """Experience schema.""" - +import time from enum import Enum from typing import Optional @@ -67,6 +67,7 @@ class Experience(BaseModel): entry_type: EntryType = Field(default=EntryType.AUTOMATIC, description="Type of entry: Manual or Automatic.") tag: str = Field(default="", description="Tagging experience.") traj: Optional[Trajectory] = Field(default=None, description="Trajectory.") + timestamp: Optional[float] = Field(default_factory=time.time) def rag_key(self): return self.req diff --git a/tests/metagpt/exp_pool/test_decorator.py b/tests/metagpt/exp_pool/test_decorator.py index 0c02dcdfc..0ca4c6ce1 100644 --- a/tests/metagpt/exp_pool/test_decorator.py +++ b/tests/metagpt/exp_pool/test_decorator.py @@ -159,7 +159,7 @@ class TestExpCache: @pytest.mark.asyncio async def test_exp_cache_disabled(self, mock_config, mock_exp_manager): - mock_config.exp_pool.enable_read = False + mock_config.exp_pool.enabled = False @exp_cache(manager=mock_exp_manager) async def test_func(req): diff --git a/tests/metagpt/exp_pool/test_manager.py b/tests/metagpt/exp_pool/test_manager.py index 3fb2aaa6d..4d298a44e 100644 --- a/tests/metagpt/exp_pool/test_manager.py +++ b/tests/metagpt/exp_pool/test_manager.py @@ -10,7 +10,7 @@ from metagpt.exp_pool.schema import QueryType class TestExperienceManager: @pytest.fixture def mock_config(self): - return Config(llm=LLMConfig(), exp_pool=ExperiencePoolConfig(enable_write=True, enable_read=True)) + return Config(llm=LLMConfig(), exp_pool=ExperiencePoolConfig(enable_write=True, enable_read=True, enabled=True)) @pytest.fixture def mock_storage(self, mocker): From f87386b48d40fd82b49c30e2544ff5bf5e04e411 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Fri, 9 Aug 2024 16:34:27 +0800 Subject: [PATCH 04/19] add-few-shots-quick-think-prompt --- metagpt/prompts/di/role_zero.py | 46 +++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 8443a7960..c05d899ce 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -109,13 +109,55 @@ Help check if there are any formatting issues with the JSON data? If so, please If no issues are detected, the original json data should be returned unchanged. Do not omit any information. Output the JSON data in a format that can be loaded by the json.loads() function. """ +QUICK_THINK_SYSTEM_PROMPT = """ +{role_info} +Your team member: +{team_info} +However, you MUST respond to the user message by yourself directly, DON'T ask your team members. +""" + +QUICK_THINK_PROMPT_V2 = """ +Determine if the latest user message qualifies as a "quick question." Quick questions include: + +- Common-sense inquiries (e.g., general knowledge, factual information) +- Legal, logical, or math-related questions +- Multiple-choice questions +- Greetings or casual chat +- Questions about you or your team + +## Exclusions: +- Time- or location-sensitive questions (e.g., weather, news inquiries) are NOT quick questions. +- Software development tasks are NOT quick questions, except for: + + - Writing trivial code snippets (fewer than 30 lines) + - Completing a single function or class + - Explaining concepts, writing tutorials, or creating documentation + +Respond with a concise thought followed by "YES" if the question is a quick question. Otherwise, respond with "NO." Your response: +""" QUICK_THINK_PROMPT = """ Decide if the latest user message previously is a quick question. Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly. Questions about you or your team info are also quick questions. -Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search. Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question. However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. -Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO or a SEARCH. Your response: +Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search. +If the query is ambiguous, you should output OOD (Out of Domain) to indicate the question is out of the domain. + +Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO, a SEARCH, or an OOD. Your response: """ + +QUICK_THINK_EXAMPLES =""" +# Example + +1. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "How to design an online document editing platform that supports real-time collaboration? Please answer me directly."}}], We can get the response: (It requires an direct answer) should be answered with "YES.", which scored: 10. +2. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Help me find some of the latest research papers on deep learning."}}], We can get the response: (This is a time-sensitive question) should be answered with "SEARCH.", which scored: 10. +3. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Tell me the difference between supervised learning and unsupervised learning in machine learning."}}], We can get the response: (This is a general knowledge question) should be answered with "YES.", which scored: 10. +4. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Recommend some programming practice websites suitable for beginners."}}], We can get the response: (This is a general knowledge question) should be answered with "YES.", which scored: 10. +5. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Make a personal website that runs Game of Life."}}], We can get the response: (This is a software development task) should be answered with "NO.", which scored: 10. + +# Instruction +""" + +QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT \ No newline at end of file From d900a01844d8eb930c77c9bd7dcc32398687e47d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Fri, 9 Aug 2024 17:47:35 +0800 Subject: [PATCH 05/19] fixbug: async Editor.read --- metagpt/tools/libs/editor.py | 18 ++++++++--------- metagpt/utils/common.py | 27 ------------------------- tests/metagpt/tools/libs/test_editor.py | 5 +++-- 3 files changed, 12 insertions(+), 38 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index 2ce0490f9..40625a992 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -11,7 +11,7 @@ from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool from metagpt.utils import read_docx -from metagpt.utils.common import aread_bin, awrite_bin, run_coroutine_sync +from metagpt.utils.common import aread_bin, awrite_bin from metagpt.utils.repo_to_markdown import is_text_file from metagpt.utils.report import EditorReporter @@ -45,13 +45,13 @@ class Editor: # self.resource.report(path, "path") return f"The writing/coding the of the file {os.path.basename(path)}' is now completed. The file '{os.path.basename(path)}' has been successfully created." - def read(self, path: str) -> FileBlock: + async def read(self, path: str) -> FileBlock: """Read the whole content of a file. Using absolute paths as the argument for specifying the file location.""" - is_text, mime_type = run_coroutine_sync(is_text_file, path) + is_text, mime_type = await is_text_file(path) if is_text: lines = self._read_text(path) elif mime_type == "application/pdf": - lines = self._read_pdf(path) + lines = await self._read_pdf(path) elif mime_type in { "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -59,7 +59,7 @@ class Editor: "application/vnd.openxmlformats-officedocument.wordprocessingml.template", "application/vnd.ms-word.template.macroEnabled.12", }: - lines = self._read_docx(path) + lines = await self._read_docx(path) else: return FileBlock(file_path=str(path), block_content="") self.resource.report(str(path), "path") @@ -225,8 +225,8 @@ class Editor: return lines @staticmethod - def _read_pdf(path: Union[str, Path]) -> List[str]: - result = run_coroutine_sync(Editor._omniparse_read_file, path) + async def _read_pdf(path: Union[str, Path]) -> List[str]: + result = await Editor._omniparse_read_file(path) if result: return result @@ -237,8 +237,8 @@ class Editor: return [i.text for i in lines] @staticmethod - def _read_docx(path: Union[str, Path]) -> List[str]: - result = run_coroutine_sync(Editor._omniparse_read_file, path) + async def _read_docx(path: Union[str, Path]) -> List[str]: + result = await Editor._omniparse_read_file(path) if result: return result return read_docx(str(path)) diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index fc1e35a3d..42905c649 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -12,7 +12,6 @@ from __future__ import annotations import ast -import asyncio import base64 import contextlib import csv @@ -1054,32 +1053,6 @@ def tool2name(cls, methods: List[str], entry) -> Dict[str, Any]: return mappings -def run_coroutine_sync(coroutine, *args, **kwargs): - """ - Runs a coroutine function synchronously by encapsulating its invocation as a non-coroutine function call. - - Args: - coroutine: The coroutine function to be encapsulated. - *args: Positional arguments to be passed to the coroutine. - **kwargs: Keyword arguments to be passed to the coroutine. - - Returns: - The return value of the coroutine. - """ - try: - loop = asyncio.get_running_loop() - except RuntimeError: # No running event loop - loop = None - - if loop and loop.is_running(): - # The event loop is already running - future = asyncio.run_coroutine_threadsafe(coroutine(*args, **kwargs), loop) - return future.result() - else: - # The event loop is not running - return asyncio.run(coroutine(*args, **kwargs)) - - def log_time(method): """A time-consuming decorator for printing execution duration.""" diff --git a/tests/metagpt/tools/libs/test_editor.py b/tests/metagpt/tools/libs/test_editor.py index 535315fad..64149fdb7 100644 --- a/tests/metagpt/tools/libs/test_editor.py +++ b/tests/metagpt/tools/libs/test_editor.py @@ -141,9 +141,10 @@ def test_new_content_format_issue(test_file): TEST_DATA_PATH / "movie/trailer.mp4", ], ) -def test_read_files(filename): +@pytest.mark.asyncio +async def test_read_files(filename): editor = Editor() - file_block = editor.read(filename) + file_block = await editor.read(filename) assert file_block assert file_block.file_path if filename.suffix not in [".png", ".mp3", ".mp4"]: From 43340b79eb55c46b99e2c0422bdd8c208cc10434 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Fri, 9 Aug 2024 18:22:50 +0800 Subject: [PATCH 06/19] solve anti-robot in web search --- metagpt/actions/research.py | 8 +++++++- metagpt/actions/search_enhanced_qa.py | 14 +++++++++++++- metagpt/tools/web_browser_engine_playwright.py | 8 +++----- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 7b7a2e911..0522fbd19 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -180,7 +180,13 @@ class CollectLinks(Action): results = self.rank_func(results) return [i["link"] for i in results[:num_results]] - async def _search_urls(self, query: str, max_results: int) -> list[str]: + async def _search_urls(self, query: str, max_results: int) -> list[dict[str, str]]: + """Use search_engine to get urls + + Returns: + e.g. [{"title": "...", "link": "...", "snippet", "..."}] + """ + return await self.search_engine.run(query, max_results=max_results, as_string=False) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index ebf945fd6..c2538dbfb 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -74,6 +74,14 @@ class SearchEnhancedQA(Action): java_script_enabled: bool = Field( default=False, description="Whether or not to enable JavaScript in the web browser context. Defaults to False." ) + user_agent: str = Field( + default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81", + description="Specific user agent to use in browser", + ) + extra_http_headers: dict = Field( + default={"sec-ch-ua": 'Chromium";v="125", "Not.A/Brand";v="24'}, + description="An object containing additional HTTP headers to be sent with every request.", + ) max_chars_per_webpage_summary: int = Field( default=4000, description="Maximum summary length for each web page content." ) @@ -86,7 +94,11 @@ class SearchEnhancedQA(Action): def initialize(self): if self.web_browse_and_summarize_action is None: self.web_browser_engine = WebBrowserEngine.from_browser_config( - self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled + self.config.browser, + proxy=self.config.proxy, + java_script_enabled=self.java_script_enabled, + extra_http_headers=self.extra_http_headers, + user_agent=self.user_agent, ) self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine) diff --git a/metagpt/tools/web_browser_engine_playwright.py b/metagpt/tools/web_browser_engine_playwright.py index 33f6ae3a5..f38a3b296 100644 --- a/metagpt/tools/web_browser_engine_playwright.py +++ b/metagpt/tools/web_browser_engine_playwright.py @@ -39,11 +39,9 @@ class PlaywrightWrapper(BaseModel): if not any(str.startswith(i, "--proxy-server=") for i in args): launch_kwargs["proxy"] = {"server": self.proxy} - if "ignore_https_errors" in kwargs: - self.context_kwargs["ignore_https_errors"] = kwargs["ignore_https_errors"] - - if "java_script_enabled" in kwargs: - self.context_kwargs["java_script_enabled"] = kwargs["java_script_enabled"] + for key in ["ignore_https_errors", "java_script_enabled", "extra_http_headers", "user_agent"]: + if key in kwargs: + self.context_kwargs[key] = kwargs[key] async def run(self, url: str, *urls: str, per_page_timeout: float = None) -> WebPage | list[WebPage]: async with async_playwright() as ap: From 75b015bdf0e684282f2cee5b3bbd55be7555ac53 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Fri, 9 Aug 2024 18:27:31 +0800 Subject: [PATCH 07/19] solve anti-robot in web search --- metagpt/actions/research.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 0522fbd19..5e670520c 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -181,7 +181,7 @@ class CollectLinks(Action): return [i["link"] for i in results[:num_results]] async def _search_urls(self, query: str, max_results: int) -> list[dict[str, str]]: - """Use search_engine to get urls + """Use search_engine to get urls. Returns: e.g. [{"title": "...", "link": "...", "snippet", "..."}] From 8f6f71ba7efc2c8e39602c29ba050d549133818b Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Fri, 9 Aug 2024 18:30:28 +0800 Subject: [PATCH 08/19] solve anti-robot in web search --- metagpt/actions/search_enhanced_qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index c2538dbfb..1d7944d61 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -76,7 +76,7 @@ class SearchEnhancedQA(Action): ) user_agent: str = Field( default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81", - description="Specific user agent to use in browser", + description="Specific user agent to use in browser.", ) extra_http_headers: dict = Field( default={"sec-ch-ua": 'Chromium";v="125", "Not.A/Brand";v="24'}, From f338d168ca5183c1a7e958fc87b0a295ee203896 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Fri, 9 Aug 2024 18:58:51 +0800 Subject: [PATCH 09/19] =?UTF-8?q?=E5=90=88=E5=B9=B6teamleader=EF=BC=8Cswe-?= =?UTF-8?q?agent=E7=9A=84=E5=8E=9F=E7=B3=BB=E7=BB=9F=E6=8F=90=E7=A4=BA?= =?UTF-8?q?=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 15 +++--- metagpt/prompts/di/swe_agent.py | 81 +++++++++++++++---------------- metagpt/prompts/di/team_leader.py | 8 +-- metagpt/roles/di/role_zero.py | 25 ++++------ metagpt/roles/di/swe_agent.py | 7 +-- metagpt/roles/di/team_leader.py | 2 - 6 files changed, 60 insertions(+), 78 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index de4bdb066..a92a3c075 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -12,13 +12,16 @@ Note: """ # To ensure compatibility with hard-coded experience, do not add any other content between "# Example" and "# Instruction". -#################### +########################## ignore guidance + # Latest Observation # {latest_observation} +# {thought_guidance} +# Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided. ########################### -INSTRUCTION_GUIDANCE = """ +SYSTEM_PROMPT = """ # Data Structure class Task(BaseModel): @@ -44,10 +47,6 @@ Special Command: Use {{"command_name": "end"}} to do nothing or indicate complet {instruction} """ - -# {thought_guidance} -# Finally, combine your thoughts, describe what you want to do conscisely in 20 words, including which process you will taked and whether you will end, then follow your thoughts to list the commands, adhering closely to the instructions provided. - CMD_PROMPT = """ # Current Plan {plan_status} @@ -58,7 +57,6 @@ CMD_PROMPT = """ # Restrictions {requirements_constraints} - Pay close attention to the Example provided, you can reuse the example for your current situation if it fits. You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially. If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task. @@ -112,6 +110,9 @@ JSON_REPAIR_PROMPT = """ ## json data {json_data} +## json decode error +{json_decode_error} + ## Output Format ```json diff --git a/metagpt/prompts/di/swe_agent.py b/metagpt/prompts/di/swe_agent.py index 7455cf30a..87332f9ef 100644 --- a/metagpt/prompts/di/swe_agent.py +++ b/metagpt/prompts/di/swe_agent.py @@ -4,48 +4,6 @@ You can find the original examples from the SWE-agent project here: https://github.com/princeton-nlp/SWE-agent/tree/main/config/configs """ -SWE_AGENT_SYSTEM_TEMPLATE = """ -SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface. - -The special interface consists of a file editor that shows you 100 lines of a file at a time. - -Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. -If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. -Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. - -Your output should always contain a section of reasoning and a command described in JSON format. - -Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes. -As shown in the example below: - -First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. - -```json -{{ - "command_name": "Bash.run", - "args": {{ - "cmd": "ls -a" - }} -}} -``` - -You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. -If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. -Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. - -You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need. -You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors. - -However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. - -In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix. - -# INSTRUCTIONS: -Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it. -All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. -Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need. -""" - MINIMAL_EXAMPLE = """ ## Example of a actions trajectory User Requirement and Issue: Fix the bug in the repo. Because the environment is not available, you DO NOT need to run and modify any existing test case files or add new test case files to ensure that the bug is fixed. @@ -227,6 +185,45 @@ IMPORTANT_TIPS = """ """ NEXT_STEP_TEMPLATE = f""" +SETTING: You are an autonomous programmer, and you're working directly in the environment line with a special interface. + +The special interface consists of a file editor that shows you 100 lines of a file at a time. + +Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION. Pay attention to the original indentation when replacing the function. +If you'd like to add the line ' print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +Always review your changes post-edit to ensure they accurately reflect your intentions. If the changes are not as desired, don't hesitate to issue another command to correct them. + +Your output should always contain a section of reasoning and a command described in JSON format. + +Use \\n to represent line breaks, ensuring the command conforms to the JSON format and is displayed on a single line. Except for the `edit` command, each parameter of the command needs to be enclosed in single quotes. +As shown in the example below: + +First I'll start by using ls to see what files are in the current directory. Then maybe we can look at some relevant files to see what they look like. + +```json +{{ + "command_name": "Bash.run", + "args": {{ + "cmd": "ls -a" + }} +}} +``` + +You should only include a *SINGLE* command in the command section and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference. +If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first command, and then after receiving a response you'll be able to issue the second command. +Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command. + +You can use any bash commands you want (e.g., find, grep, cat, ls, cd) or any custom special tools (including `edit`) by calling Bash.run. Edit all the files you need. +You should carefully observe the behavior and results of the previous action, and avoid triggering repeated errors. + +However, the Bash.run does NOT support interactive session commands (e.g. python, vim), so please do not invoke them. + +In addition to the terminal, I also provide additional tools. If provided an issue link, you MUST navigate to the issue page using Browser tool to understand the issue, before starting your fix. + +# INSTRUCTIONS: +Your first action must be to check if the repository exists at the current path. If it exists, navigate to the repository path. If the repository doesn't exist, please download it and then navigate to it. +All subsequent actions must be performed within this repository path. Do not leave this directory to execute any actions at any time. +Your terminal session has started, and you can use any bash commands or the special interface to help you. Edit all the files you need. # Example of Output These examples are provided to demonstrate the output style that expected to be several stages including Locate issue, Fix the bug, Test the fix(Optional), and Submit the changes. It is included to show you how to correctly use the interface. You do not need to follow exactly what is done in the Example. The separator is "-----". ----- Beginning of Examples ----- diff --git a/metagpt/prompts/di/team_leader.py b/metagpt/prompts/di/team_leader.py index 2dd69d189..ad473dfff 100644 --- a/metagpt/prompts/di/team_leader.py +++ b/metagpt/prompts/di/team_leader.py @@ -1,16 +1,12 @@ from metagpt.prompts.di.role_zero import THOUGHT_GUIDANCE -SYSTEM_PROMPT = """ -You are a team leader, and you are responsible for drafting tasks and routing tasks to your team members. -When drafting and routing tasks, ALWAYS include necessary or important info inside the instruction, such as path, link, environment to team members, because you are their sole info source. -Each time you do something, reply to human letting them know what you did. -""" - TL_INSTRUCTION = """ You are a team leader, and you are responsible for drafting tasks and routing tasks to your team members. Your team member: {team_info} You should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it. +When drafting and routing tasks, ALWAYS include necessary or important info inside the instruction, such as path, link, environment to team members, because you are their sole info source. +Each time you do something, reply to human letting them know what you did. When creating a new plan involving multiple members, create all tasks at once. If plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc. You should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source. diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 8093eda40..af15fb745 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -19,11 +19,11 @@ from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( ASK_HUMAN_COMMAND, CMD_PROMPT, - INSTRUCTION_GUIDANCE, JSON_REPAIR_PROMPT, QUICK_THINK_PROMPT, REGENERATE_PROMPT, ROLE_INSTRUCTION, + SYSTEM_PROMPT, THOUGHT_GUIDANCE, ) from metagpt.roles import Role @@ -47,8 +47,7 @@ class RoleZero(Role): name: str = "Zero" profile: str = "RoleZero" goal: str = "" - system_msg: list[str] = None # Use None to conform to the default value at llm.aask - instruction_system_prpomt: str = INSTRUCTION_GUIDANCE + system_prompt: str = SYSTEM_PROMPT # Use None to conform to the default value at llm.aask cmd_prompt: str = CMD_PROMPT thought_guidance: str = THOUGHT_GUIDANCE instruction: str = ROLE_INSTRUCTION @@ -154,25 +153,23 @@ class RoleZero(Role): tools = await self.tool_recommender.recommend_tools() tool_info = json.dumps({tool.name: tool.schemas for tool in tools}) + ### Role Instruction ### instruction = self.instruction.strip() - instruction_system_prpomt = self.instruction_system_prpomt.format( - example=example, - available_commands=tool_info, - instruction=instruction, - task_type_desc=self.task_type_desc, + system_prompt = self.system_prompt.format( + task_type_desc=self.task_type_desc, available_commands=tool_info, example=example, instruction=instruction ) - guidance_system_msgs = [instruction_system_prpomt] - if self.system_msg: - guidance_system_msgs = [self.system_msg[0] + guidance_system_msgs[0]] ### Make Decision Dynamically ### - memory = self.rc.memory.get(self.memory_k) prompt = self.cmd_prompt.format( plan_status=plan_status, current_task=current_task, requirements_constraints=self.requirements_constraints, ) + + ### Recent Observation ### + memory = self.rc.memory.get(self.memory_k) memory = await self.parse_browser_actions(memory) + req = self.llm.format_msg(memory + [UserMessage(content=prompt)]) async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "react"}) @@ -181,9 +178,7 @@ class RoleZero(Role): current_task=current_task, instruction=instruction, ) - self.command_rsp = await self.llm_cached_aask( - req=req, system_msgs=guidance_system_msgs, state_data=state_data - ) + self.command_rsp = await self.llm_cached_aask(req=req, system_msgs=[system_prompt], state_data=state_data) self.command_rsp = await self._check_duplicates(req, self.command_rsp) diff --git a/metagpt/roles/di/swe_agent.py b/metagpt/roles/di/swe_agent.py index 2384ac147..c8081066e 100644 --- a/metagpt/roles/di/swe_agent.py +++ b/metagpt/roles/di/swe_agent.py @@ -3,11 +3,7 @@ import json from pydantic import Field from metagpt.logs import logger -from metagpt.prompts.di.swe_agent import ( - MINIMAL_EXAMPLE, - NEXT_STEP_TEMPLATE, - SWE_AGENT_SYSTEM_TEMPLATE, -) +from metagpt.prompts.di.swe_agent import MINIMAL_EXAMPLE, NEXT_STEP_TEMPLATE from metagpt.roles.di.role_zero import RoleZero from metagpt.tools.libs.git import git_create_pull from metagpt.tools.libs.terminal import Bash @@ -17,7 +13,6 @@ class SWEAgent(RoleZero): name: str = "Swen" profile: str = "Issue Solver" goal: str = "Resolve GitHub issue or bug in any existing codebase" - system_msg: str = [SWE_AGENT_SYSTEM_TEMPLATE] _instruction: str = NEXT_STEP_TEMPLATE tools: list[str] = [ "Bash", diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index f495c4aaa..12b4b3a18 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -4,7 +4,6 @@ from metagpt.actions.di.run_command import RunCommand from metagpt.prompts.di.team_leader import ( FINISH_CURRENT_TASK_CMD, QUICK_THINK_SYSTEM_PROMPT, - SYSTEM_PROMPT, TL_INSTRUCTION, TL_THOUGHT_GUIDANCE, ) @@ -19,7 +18,6 @@ class TeamLeader(RoleZero): name: str = "Mike" profile: str = "Team Leader" goal: str = "Manage a team to assist users" - system_msg: list[str] = [SYSTEM_PROMPT] thought_guidance: str = TL_THOUGHT_GUIDANCE # TeamLeader only reacts once each time, but may encounter errors or need to ask human, thus allowing 2 more turns max_react_loop: int = 3 From d788722bc2e9f4ce7f77c43cba6dae867e8a643f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Fri, 9 Aug 2024 19:51:00 +0800 Subject: [PATCH 10/19] =?UTF-8?q?=20=E5=90=88=E5=B9=B6teamleader=20?= =?UTF-8?q?=E5=92=8Csweagent=E7=9A=84=E5=8E=9F=E7=B3=BB=E7=BB=9F=E6=8F=90?= =?UTF-8?q?=E7=A4=BA=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 2 ++ metagpt/prompts/di/swe_agent.py | 6 ++++-- metagpt/roles/di/role_zero.py | 2 ++ metagpt/roles/di/swe_agent.py | 8 ++++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index a92a3c075..9742cab4a 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -48,6 +48,8 @@ Special Command: Use {{"command_name": "end"}} to do nothing or indicate complet """ CMD_PROMPT = """ +{current_state} + # Current Plan {plan_status} diff --git a/metagpt/prompts/di/swe_agent.py b/metagpt/prompts/di/swe_agent.py index 87332f9ef..a737d4a72 100644 --- a/metagpt/prompts/di/swe_agent.py +++ b/metagpt/prompts/di/swe_agent.py @@ -233,10 +233,12 @@ These examples are provided to demonstrate the output style that expected to be # IMPORTANT TIPS {IMPORTANT_TIPS} + +Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format:" +""" +CURRENT_BASH_STATE = """ # Output Next Step The current bash state is: (Open file: {{open_file}}) (Current directory: {{working_dir}}) - -Avoid repeating the same command. Instead, please think about the current situation and provide the next bash command to execute in JSON format:" """ diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index af15fb745..75d070822 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -49,6 +49,7 @@ class RoleZero(Role): goal: str = "" system_prompt: str = SYSTEM_PROMPT # Use None to conform to the default value at llm.aask cmd_prompt: str = CMD_PROMPT + cmd_prompt_current_state: str = "" thought_guidance: str = THOUGHT_GUIDANCE instruction: str = ROLE_INSTRUCTION task_type_desc: str = None @@ -161,6 +162,7 @@ class RoleZero(Role): ### Make Decision Dynamically ### prompt = self.cmd_prompt.format( + current_state=self.cmd_prompt_current_state, plan_status=plan_status, current_task=current_task, requirements_constraints=self.requirements_constraints, diff --git a/metagpt/roles/di/swe_agent.py b/metagpt/roles/di/swe_agent.py index c8081066e..13fc87c60 100644 --- a/metagpt/roles/di/swe_agent.py +++ b/metagpt/roles/di/swe_agent.py @@ -3,7 +3,11 @@ import json from pydantic import Field from metagpt.logs import logger -from metagpt.prompts.di.swe_agent import MINIMAL_EXAMPLE, NEXT_STEP_TEMPLATE +from metagpt.prompts.di.swe_agent import ( + CURRENT_BASH_STATE, + MINIMAL_EXAMPLE, + NEXT_STEP_TEMPLATE, +) from metagpt.roles.di.role_zero import RoleZero from metagpt.tools.libs.git import git_create_pull from metagpt.tools.libs.terminal import Bash @@ -49,7 +53,7 @@ class SWEAgent(RoleZero): """ state_output = await self.terminal.run("state") bash_state = json.loads(state_output) - self.instruction = self._instruction.format(**bash_state).strip() + self.cmd_prompt_current_state = CURRENT_BASH_STATE.formate(**bash_state).strip() async def _parse_commands_for_eval(self): """ From 812d6a825d21047f96a241cb9c8d23b471d4004c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Fri, 9 Aug 2024 20:47:41 +0800 Subject: [PATCH 11/19] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=90=88=E5=B9=B6swe-a?= =?UTF-8?q?gent=E7=B3=BB=E7=BB=9F=E6=8F=90=E7=A4=BA=E8=AF=8D=E5=90=8E?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/swe_agent.py | 4 ++-- metagpt/roles/di/swe_agent.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metagpt/prompts/di/swe_agent.py b/metagpt/prompts/di/swe_agent.py index a737d4a72..7151199ad 100644 --- a/metagpt/prompts/di/swe_agent.py +++ b/metagpt/prompts/di/swe_agent.py @@ -239,6 +239,6 @@ Avoid repeating the same command. Instead, please think about the current situat CURRENT_BASH_STATE = """ # Output Next Step The current bash state is: -(Open file: {{open_file}}) -(Current directory: {{working_dir}}) +(Open file: {open_file}) +(Current directory: {working_dir}) """ diff --git a/metagpt/roles/di/swe_agent.py b/metagpt/roles/di/swe_agent.py index 13fc87c60..e1d2c9613 100644 --- a/metagpt/roles/di/swe_agent.py +++ b/metagpt/roles/di/swe_agent.py @@ -53,7 +53,7 @@ class SWEAgent(RoleZero): """ state_output = await self.terminal.run("state") bash_state = json.loads(state_output) - self.cmd_prompt_current_state = CURRENT_BASH_STATE.formate(**bash_state).strip() + self.cmd_prompt_current_state = CURRENT_BASH_STATE.format(**bash_state).strip() async def _parse_commands_for_eval(self): """ From 0979dacd2a16c82c0d33cbbda35825babb765b44 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Sat, 10 Aug 2024 17:46:37 +0800 Subject: [PATCH 12/19] =?UTF-8?q?update:=20CR=E4=BC=98=E5=8C=96=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=8C=96=E8=B7=91=E6=B5=8B=E6=9A=B4=E9=9C=B2=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/ext/cr/actions/code_review.py | 65 +++++++++++++++------------ metagpt/ext/cr/utils/cleaner.py | 2 +- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/metagpt/ext/cr/actions/code_review.py b/metagpt/ext/cr/actions/code_review.py index ae6086f11..473ea8018 100644 --- a/metagpt/ext/cr/actions/code_review.py +++ b/metagpt/ext/cr/actions/code_review.py @@ -20,7 +20,7 @@ from metagpt.utils.common import parse_json_code_block CODE_REVIEW_PROMPT_TEMPLATE = """ NOTICE Let's think and work step by step. -With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one. +With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one within 4000 tokens. The Patch code has added line number at the first character each line for reading, but the review should focus on new added code inside the `Patch` (lines starting with line number and '+'). Each point is start with a line number and follows with the point description. @@ -48,14 +48,16 @@ Each point is start with a line number and follows with the point description. CodeReview guidelines: - Generate code `comment` that do not meet the point description. -- Each `comment` should be restricted inside the `commented_file` +- Each `comment` should be restricted inside the `commented_file`. - Try to provide diverse and insightful comments across different `commented_file`. - Don't suggest to add docstring unless it's necessary indeed. - If the same code error occurs multiple times, it cannot be omitted, and all places need to be identified.But Don't duplicate at the same place with the same comment! - Every line of code in the patch needs to be carefully checked, and laziness cannot be omitted. It is necessary to find out all the places. - The `comment` and `point_id` in the Output must correspond to and belong to the same one `Point`. +Strictly Observe: Just print the PR Patch comments in json format like **Output Format**. +And the output JSON must be able to be parsed by json.loads() without any errors. """ CODE_REVIEW_COMFIRM_SYSTEM_PROMPT = """ @@ -128,38 +130,43 @@ class CodeReview(Action): points_dict = {point.id: point for point in points} new_comments = [] for cmt in comments: - point = points_dict[cmt.get("point_id")] + try: + point = points_dict[cmt.get("point_id")] - code_start_line = cmt.get("code_start_line") - code_end_line = cmt.get("code_end_line") - # 如果代码位置为空的话,那么就将这条记录丢弃掉 - if not code_start_line or not code_end_line: - logger.info("False") - continue + code_start_line = cmt.get("code_start_line") + code_end_line = cmt.get("code_end_line") + # 如果代码位置为空的话,那么就将这条记录丢弃掉 + if not code_start_line or not code_end_line: + logger.info("False") + continue - # 代码增加上下文,提升confirm的准确率 - code = get_code_block_from_patch(patch, str(max(1, int(code_start_line) - 3)), str(int(code_end_line) + 3)) - pattern = r"^[ \t\n\r(){}[\];,]*$" - if re.match(pattern, code): + # 代码增加上下文,提升confirm的准确率 code = get_code_block_from_patch( - patch, str(max(1, int(code_start_line) - 5)), str(int(code_end_line) + 5) + patch, str(max(1, int(code_start_line) - 3)), str(int(code_end_line) + 3) ) - code_language = "Java" - code_file_ext = cmt.get("commented_file", ".java").split(".")[-1] - if code_file_ext == ".java": + pattern = r"^[ \t\n\r(){}[\];,]*$" + if re.match(pattern, code): + code = get_code_block_from_patch( + patch, str(max(1, int(code_start_line) - 5)), str(int(code_end_line) + 5) + ) code_language = "Java" - elif code_file_ext == ".py": - code_language = "Python" - prompt = CODE_REVIEW_COMFIRM_TEMPLATE.format( - code=code, - comment=cmt.get("comment"), - desc=point.text, - example=point.yes_example + "\n" + point.no_example, - ) - system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)] - resp = await self.llm.aask(prompt, system_msgs=system_prompt) - if "True" in resp or "true" in resp: - new_comments.append(cmt) + code_file_ext = cmt.get("commented_file", ".java").split(".")[-1] + if code_file_ext == ".java": + code_language = "Java" + elif code_file_ext == ".py": + code_language = "Python" + prompt = CODE_REVIEW_COMFIRM_TEMPLATE.format( + code=code, + comment=cmt.get("comment"), + desc=point.text, + example=point.yes_example + "\n" + point.no_example, + ) + system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)] + resp = await self.llm.aask(prompt, system_msgs=system_prompt) + if "True" in resp or "true" in resp: + new_comments.append(cmt) + except Exception: + logger.info("False") logger.info(f"original comments num: {len(comments)}, confirmed comments num: {len(new_comments)}") return new_comments diff --git a/metagpt/ext/cr/utils/cleaner.py b/metagpt/ext/cr/utils/cleaner.py index 3215737c1..8fc0b798c 100644 --- a/metagpt/ext/cr/utils/cleaner.py +++ b/metagpt/ext/cr/utils/cleaner.py @@ -10,7 +10,7 @@ def rm_patch_useless_part(patch: PatchSet, used_suffix: list[str] = ["java", "py useless_files = [] for pfile in patch: suffix = str(pfile.target_file).split(".")[-1] - if suffix not in used_suffix or pfile.is_removed_file or "test" in pfile.target_file.casefold(): + if suffix not in used_suffix or pfile.is_removed_file: useless_files.append(pfile.path) continue new_patch.append(pfile) From 9b360265a95c9a0cc0fe5118b5eae97868f4e8f6 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Sun, 11 Aug 2024 01:31:20 +0800 Subject: [PATCH 13/19] add discovery_service_url for GoogleAPIWrapper --- metagpt/configs/search_config.py | 1 + metagpt/tools/search_engine_googleapi.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/metagpt/configs/search_config.py b/metagpt/configs/search_config.py index e28b14c99..5f7f2d9a3 100644 --- a/metagpt/configs/search_config.py +++ b/metagpt/configs/search_config.py @@ -19,6 +19,7 @@ class SearchConfig(YamlModel): api_type: SearchEngineType = SearchEngineType.DUCK_DUCK_GO api_key: str = "" cse_id: str = "" # for google + discovery_service_url: str = "" # for google search_func: Optional[Callable] = None params: dict = Field( default_factory=lambda: { diff --git a/metagpt/tools/search_engine_googleapi.py b/metagpt/tools/search_engine_googleapi.py index 66b5ba950..2756a24c5 100644 --- a/metagpt/tools/search_engine_googleapi.py +++ b/metagpt/tools/search_engine_googleapi.py @@ -26,6 +26,8 @@ class GoogleAPIWrapper(BaseModel): api_key: str cse_id: str + discovery_service_url: Optional[str] = None + loop: Optional[asyncio.AbstractEventLoop] = None executor: Optional[futures.Executor] = None proxy: Optional[str] = None @@ -56,7 +58,7 @@ class GoogleAPIWrapper(BaseModel): @property def google_api_client(self): - build_kwargs = {"developerKey": self.api_key} + build_kwargs = {"developerKey": self.api_key, "discoveryServiceUrl": self.discovery_service_url} if self.proxy: parse_result = urlparse(self.proxy) proxy_type = parse_result.scheme From b1b8b84d85968d886c69603a76f896283743d903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Mon, 12 Aug 2024 11:41:18 +0800 Subject: [PATCH 14/19] feat: disable write trd --- metagpt/roles/architect.py | 5 +---- metagpt/tools/libs/software_development.py | 4 ---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index e37f00913..1af169ca1 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -8,7 +8,6 @@ from metagpt.actions import WritePRD from metagpt.actions.design_api import WriteDesign from metagpt.roles.di.role_zero import RoleZero -from metagpt.tools.libs.software_development import write_trd_and_framework from metagpt.utils.common import tool2name ARCHITECT_INSTRUCTION = """ @@ -33,7 +32,7 @@ class Architect(RoleZero): name: str = "Bob" profile: str = "Architect" - goal: str = "design a concise, usable, complete software system. ouput the system design or software framework." + goal: str = "design a concise, usable, complete software system. output the system design." constraints: str = ( "make sure the architecture is simple enough and use appropriate open source " "libraries. Use same language as user requirement" @@ -45,7 +44,6 @@ class Architect(RoleZero): "Editor:write,read,write_content", "RoleZero", "WriteDesign", - write_trd_and_framework.__name__, ] def __init__(self, **kwargs) -> None: @@ -64,7 +62,6 @@ class Architect(RoleZero): self.tool_execution_map.update(tool2name(WriteDesign, ["run"], write_design.run)) self.tool_execution_map.update( { - write_trd_and_framework.__name__: write_trd_and_framework, "run": write_design.run, # alias } ) diff --git a/metagpt/tools/libs/software_development.py b/metagpt/tools/libs/software_development.py index 1f8538dfc..1a20bf087 100644 --- a/metagpt/tools/libs/software_development.py +++ b/metagpt/tools/libs/software_development.py @@ -21,7 +21,6 @@ from metagpt.actions.requirement_analysis.trd import ( from metagpt.const import ASSISTANT_ALIAS, DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH from metagpt.context import Context from metagpt.logs import ToolLogItem, log_tool_output, logger -from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import aread from metagpt.utils.cost_manager import CostManager @@ -86,7 +85,6 @@ async def mock_asearch_acknowledgement(use_case_actors: str): return await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md") -@register_tool(tags=["system design", "write trd", "Write a TRD"]) async def write_trd( use_case_actors: str, user_requirements: str, @@ -155,7 +153,6 @@ async def write_trd( return trd -@register_tool(tags=["system design", "write software framework", "Write a software framework based on a TRD"]) async def write_framework( use_case_actors: str, trd: str, @@ -240,7 +237,6 @@ async def write_framework( return "## Software Framework" + "".join([f"\n- {i}" for i in file_list]) -@register_tool(tags=["system design", "write trd and framework", "Write a TRD and the framework"]) async def write_trd_and_framework( use_case_actors: str, user_requirements: str, From 17b51c13de414394ff4b731a438101dc3da215d6 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 14:11:17 +0800 Subject: [PATCH 15/19] optimized four-case intention prompts --- metagpt/prompts/di/role_zero.py | 175 ++++++++++++++++++++++++++------ metagpt/roles/di/role_zero.py | 25 ++++- 2 files changed, 163 insertions(+), 37 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index c05d899ce..eeb86d43e 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -109,32 +109,20 @@ Help check if there are any formatting issues with the JSON data? If so, please If no issues are detected, the original json data should be returned unchanged. Do not omit any information. Output the JSON data in a format that can be loaded by the json.loads() function. """ -QUICK_THINK_SYSTEM_PROMPT = """ -{role_info} -Your team member: -{team_info} -However, you MUST respond to the user message by yourself directly, DON'T ask your team members. -""" -QUICK_THINK_PROMPT_V2 = """ -Determine if the latest user message qualifies as a "quick question." Quick questions include: +# QUICK_THINK_PROMPT = """ +# Decide if the latest user message previously is a quick question. +# Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly. +# Questions about you or your team info are also quick questions. +# Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question. +# However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. -- Common-sense inquiries (e.g., general knowledge, factual information) -- Legal, logical, or math-related questions -- Multiple-choice questions -- Greetings or casual chat -- Questions about you or your team +# If the question is a quick question, you should output QUICK to indicate the question is a quick question. +# Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search. +# If the query is ambiguous or requires more information, you should output OOD (Out of Domain) to indicate the question requires further clarification. -## Exclusions: -- Time- or location-sensitive questions (e.g., weather, news inquiries) are NOT quick questions. -- Software development tasks are NOT quick questions, except for: - - - Writing trivial code snippets (fewer than 30 lines) - - Completing a single function or class - - Explaining concepts, writing tutorials, or creating documentation - -Respond with a concise thought followed by "YES" if the question is a quick question. Otherwise, respond with "NO." Your response: -""" +# Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response: +# """ QUICK_THINK_PROMPT = """ Decide if the latest user message previously is a quick question. @@ -142,22 +130,145 @@ Quick questions include common-sense, legal, logical, math, multiple-choice ques Questions about you or your team info are also quick questions. Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question. However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. -Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search. -If the query is ambiguous, you should output OOD (Out of Domain) to indicate the question is out of the domain. -Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO, a SEARCH, or an OOD. Your response: +## QUICK +If the question is a quick question, you should output QUICK to indicate the question is a quick question. +## SEARCH +If the question is a time- or location-sensitive such as wheather or news inquiry, you should output a keyword SEARCH to indicate the need for a google search. +## TASK +If the question is a software development task, or requires multiple steps of planning an execution, you should output a keyword TASK. +## OOD +If the question is ambiguous or requires more context such as link, file path, or the task cannot be done without more user's assistance, you should output OOD (Out of Domain). + +Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response: + +""" + +QUICK_THINK_PROMPT = """ +Response Categories: + +## QUICK +For straightforward questions or requests that can be answered directly. Quick questions include common-sense, legal, logical, math, short-coding, multiple-choice questions, greetings, or casual chat that you can answer directly. Questions about you or your team info are also quick questions. +## SEARCH +For queries that require up-to-date or detailed information retrieval. These include time- or location-sensitive questions, such as weather or news inquiries. However, no need to perform a search if the information is readily available. +## TASK +For complex, multi-step tasks that involve a series of actions or detailed instructions. +## AMBIGUOUS +For requests that are ambiguous, lack necessary information, or fall outside the system's capabilities. AMBIGUOUS requests have these common properties: +- Incomplete Information: Requests that mention tasks but lack critical details (e.g., no document provided for summarization). +- Vagueness: Requests that are too broad, unclear, or unspecified, making it difficult to respond effectively. +- Out of Expertise: Requests that ask for specialized advice (e.g., legal, medical) or highly technical tasks outside the model's design. +- Unrealistic Scope: The request is too extensive or unrealistic to address within a single response (e.g., “Tell me everything about…”). + + +Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Response: + +""" + + +QUICK_THINK_PROMPT = """ +# Response Categories: +## QUICK: +For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, and inquiries about you or your team. + +## SEARCH +For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn’t readily available. + +## TASK +For complex requests that involve multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires a sequence of actions. + +## AMBIGUOUS +For requests that are unclear, lack sufficient detail, or are outside the system's capabilities. Common characteristics of AMBIGUOUS requests: + +- Incomplete Information: Lacking critical details needed to perform the task (e.g., fail to provide dependent files, links, or context for a task). +- Vagueness: Broad, unspecified, or unclear requests that make it difficult to provide a precise answer. +- Out of Expertise: Requests for specialized advice (e.g., medical or legal advice) or highly technical tasks beyond the model's scope. +- Unrealistic Scope: Overly broad requests that are impossible to address meaningfully in a single response (e.g., "Tell me everything about..."). + +{examples} + +Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Your response: +""" + +# QUICK_THINK_EXAMPLES =""" +# # Example + +# 1. Given the request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", We can get the response: (This requires an direct answer) should be answered with YES. +# 2. Given the request: "Help me find some of the latest research papers on deep learning.", We can get the response: (This is a time-sensitive question) should be answered with SEARCH. +# 3. Given the request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", We can get the response: (This is a general knowledge question) should be answered with YES. +# 4. Given the request: "Recommend some programming practice websites suitable for beginners.", We can get the response: (This is a general knowledge question) should be answered with YES. +# 5. Given the request: "Make a personal website that runs Game of Life.", We can get the response: (This is a software development task) should be answered with NO. +# 6. Given the request: "Summarize the document for me.", We can get the response: (Nothing is provided by the user, requires further information) should be answered with OOD. + +# # Instruction +# """ + +# QUICK_THINK_EXAMPLES =""" +# # Example + +# 1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response: (This requires an direct answer) should be answered with QUICK. +# 2. Request: "Help me find some of the latest research papers on deep learning.", Response: (This is a time-sensitive question) should be answered with SEARCH. +# 3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response: (This is a general knowledge question) should be answered with QUICK. +# 4. Request: "Recommend some programming practice websites suitable for beginners.", Response: (This is a general knowledge question) should be answered with QUICK. +# 5. Request: "Make a personal website that runs Game of Life.", Response: (This is a software development task) should be answered with TASK. +# 6. Request: "Summarize the document for me.", Response: (The user needs to provide a link or file path to the document) should be answered with OOD. +# 7. Request: "Optimize our process.", Response: (Clarification needed: Which specific process? What does "optimize" mean in this context?) should be answered with OOD. + +# # Instruction +# """ + + +QUICK_THINK_EXAMPLES =""" +# Example + +1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response: The user is asking for a general approach to design a platform, should be answered with QUICK. +2. Request: "Help me find some of the latest research papers on deep learning.", Response: The user is asking for the latest research papers, which is a time-sensitive question, should be answered with SEARCH. +3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response: The user is asking for a general knowledge question, should be answered with QUICK. +4. Request: "Help me develop a one week healthy eating plan.", Response: The user is asking for advice on developing a healthy eating plan. The plan can be provided directly, should be answered with QUICK. +5. Request: "Make a personal website that runs Game of Life.", Response: The user is asking for a software development task with multiple steps, should be answered with TASK. +6. Request: "Summarize the document for me.", Response: The user doesn't provide a link or file path to the document, should be answered with OOD. +7. Request: "Optimize our process.", Response: Optimizing a process is a vague request, and the user needs to clarify what process it is and what is meant by 'optimize', should be answered with OOD. + +# Instruction """ QUICK_THINK_EXAMPLES =""" # Example -1. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "How to design an online document editing platform that supports real-time collaboration? Please answer me directly."}}], We can get the response: (It requires an direct answer) should be answered with "YES.", which scored: 10. -2. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Help me find some of the latest research papers on deep learning."}}], We can get the response: (This is a time-sensitive question) should be answered with "SEARCH.", which scored: 10. -3. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Tell me the difference between supervised learning and unsupervised learning in machine learning."}}], We can get the response: (This is a general knowledge question) should be answered with "YES.", which scored: 10. -4. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Recommend some programming practice websites suitable for beginners."}}], We can get the response: (This is a general knowledge question) should be answered with "YES.", which scored: 10. -5. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Make a personal website that runs Game of Life."}}], We can get the response: (This is a software development task) should be answered with "NO.", which scored: 10. +1. Request: "How do I design an online document editing platform that supports real-time collaboration?" +Thought: This is a direct query about platform design, answerable without additional resources. +Response Category: QUICK. + +2. Request: "What's the difference between supervised and unsupervised learning in machine learning?" +Thought: This is a general knowledge question that can be answered concisely. +Response Category: QUICK. + +3. Request: "Can you help me plan a healthy diet for a week?" +Thought: The user is requesting a simple plan that can be provided immediately. +Response Category: QUICK. + +4. Request: "Can you help me find the latest research papers on deep learning?" +Thought: The user needs current research, requiring a search for the most recent sources. +Response Category: SEARCH. + +5. Request: "Build a personal website that runs the Game of Life simulation." +Thought: This is a detailed software development task that requires multiple steps. +Response Category: TASK. + +6. Request: "Summarize this document for me." +Thought: The request mentions summarizing a document but doesn't provide the document itself, making it impossible to fulfill. +Response Category: AMBIGUOUS. + +7. Request: "Optimize this process." +Thought: The request is vague and lacks specifics, requiring clarification on the process to optimize. +Response Category: AMBIGUOUS. + +8. Request: "Create a poster for our upcoming event." +Thought: Critical details like event theme, date, and location are missing, making it impossible to complete the task. +Response Category: AMBIGUOUS. # Instruction """ -QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT \ No newline at end of file +# QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT +QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES) \ No newline at end of file diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 773124dcc..dc245d033 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -13,7 +13,7 @@ from metagpt.actions.analyze_requirements import AnalyzeRequirementsRestrictions from metagpt.actions.di.run_command import RunCommand from metagpt.actions.search_enhanced_qa import SearchEnhancedQA from metagpt.exp_pool import exp_cache -from metagpt.exp_pool.context_builders import RoleZeroContextBuilder +from metagpt.exp_pool.context_builders import RoleZeroContextBuilder, SimpleContextBuilder from metagpt.exp_pool.serializers import RoleZeroSerializer from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( @@ -127,7 +127,17 @@ class RoleZero(Role): def _update_tool_execution(self): pass - + + def _get_team_info(self) -> str: + if not self.rc.env: + return "" + team_info = "" + for role in self.rc.env.roles.values(): + # if role.profile == "Team Leader": + # continue + team_info += f"{role.name}: {role.profile}, {role.goal}\n" + return team_info + async def _think(self) -> bool: """Useful in 'react' mode. Use LLM to decide whether and what to do next.""" # Compatibility @@ -189,9 +199,8 @@ class RoleZero(Role): The `RoleZeroContextBuilder` attempts to add experiences to `req`. The `RoleZeroSerializer` extracts essential parts of `req` for the experience pool, trimming lengthy entries to retain only necessary parts. """ - return await self.llm.aask(req, system_msgs=system_msgs) - + async def parse_browser_actions(self, memory: List[Message]) -> List[Message]: if not self.browser.is_empty_page: pattern = re.compile(r"Command Browser\.(\w+) executed") @@ -257,7 +266,7 @@ class RoleZero(Role): context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)]) intent_result = await self.llm.aask(context) - if "YES" in intent_result: + if "QUICK" in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "quick"}) @@ -265,6 +274,12 @@ class RoleZero(Role): elif "SEARCH" in intent_result: query = "\n".join(str(msg) for msg in memory) answer = await SearchEnhancedQA().run(query) + elif "OOD" or "AMBIGUOUS " in intent_result: + # TODO: out of domain, ask human for help + pass + else: + # TODO: TASK question + pass if answer: self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand)) From bd9cb75f1adfa847cc6110182b951435d3162be4 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 14:35:30 +0800 Subject: [PATCH 16/19] optimized intention recognition for ood --- metagpt/prompts/di/role_zero.py | 106 ++------------------------------ 1 file changed, 5 insertions(+), 101 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index eeb86d43e..2aea566ae 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -110,69 +110,13 @@ If no issues are detected, the original json data should be returned unchanged. Output the JSON data in a format that can be loaded by the json.loads() function. """ -# QUICK_THINK_PROMPT = """ -# Decide if the latest user message previously is a quick question. -# Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly. -# Questions about you or your team info are also quick questions. -# Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question. -# However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. - -# If the question is a quick question, you should output QUICK to indicate the question is a quick question. -# Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search. -# If the query is ambiguous or requires more information, you should output OOD (Out of Domain) to indicate the question requires further clarification. - -# Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response: -# """ - QUICK_THINK_PROMPT = """ -Decide if the latest user message previously is a quick question. -Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly. -Questions about you or your team info are also quick questions. -Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question. -However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation. - -## QUICK -If the question is a quick question, you should output QUICK to indicate the question is a quick question. -## SEARCH -If the question is a time- or location-sensitive such as wheather or news inquiry, you should output a keyword SEARCH to indicate the need for a google search. -## TASK -If the question is a software development task, or requires multiple steps of planning an execution, you should output a keyword TASK. -## OOD -If the question is ambiguous or requires more context such as link, file path, or the task cannot be done without more user's assistance, you should output OOD (Out of Domain). - -Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response: - -""" - -QUICK_THINK_PROMPT = """ -Response Categories: - -## QUICK -For straightforward questions or requests that can be answered directly. Quick questions include common-sense, legal, logical, math, short-coding, multiple-choice questions, greetings, or casual chat that you can answer directly. Questions about you or your team info are also quick questions. -## SEARCH -For queries that require up-to-date or detailed information retrieval. These include time- or location-sensitive questions, such as weather or news inquiries. However, no need to perform a search if the information is readily available. -## TASK -For complex, multi-step tasks that involve a series of actions or detailed instructions. -## AMBIGUOUS -For requests that are ambiguous, lack necessary information, or fall outside the system's capabilities. AMBIGUOUS requests have these common properties: -- Incomplete Information: Requests that mention tasks but lack critical details (e.g., no document provided for summarization). -- Vagueness: Requests that are too broad, unclear, or unspecified, making it difficult to respond effectively. -- Out of Expertise: Requests that ask for specialized advice (e.g., legal, medical) or highly technical tasks outside the model's design. -- Unrealistic Scope: The request is too extensive or unrealistic to address within a single response (e.g., “Tell me everything about…”). - - -Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Response: - -""" - - -QUICK_THINK_PROMPT = """ -# Response Categories: +# Response Categories ## QUICK: For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, and inquiries about you or your team. ## SEARCH -For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn’t readily available. +For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn't readily available. ## TASK For complex requests that involve multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires a sequence of actions. @@ -180,57 +124,18 @@ For complex requests that involve multiple steps or detailed instructions. Examp ## AMBIGUOUS For requests that are unclear, lack sufficient detail, or are outside the system's capabilities. Common characteristics of AMBIGUOUS requests: -- Incomplete Information: Lacking critical details needed to perform the task (e.g., fail to provide dependent files, links, or context for a task). +- Incomplete Information: Requests that imply complex tasks but lack critical details (e.g., "Redesign this logo" without providing the original logo or specifying design requirements). - Vagueness: Broad, unspecified, or unclear requests that make it difficult to provide a precise answer. - Out of Expertise: Requests for specialized advice (e.g., medical or legal advice) or highly technical tasks beyond the model's scope. - Unrealistic Scope: Overly broad requests that are impossible to address meaningfully in a single response (e.g., "Tell me everything about..."). +**Note:** Before categorizing a request as TASK, consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details, it may fall under AMBIGUOUS. + {examples} Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Your response: """ -# QUICK_THINK_EXAMPLES =""" -# # Example - -# 1. Given the request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", We can get the response: (This requires an direct answer) should be answered with YES. -# 2. Given the request: "Help me find some of the latest research papers on deep learning.", We can get the response: (This is a time-sensitive question) should be answered with SEARCH. -# 3. Given the request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", We can get the response: (This is a general knowledge question) should be answered with YES. -# 4. Given the request: "Recommend some programming practice websites suitable for beginners.", We can get the response: (This is a general knowledge question) should be answered with YES. -# 5. Given the request: "Make a personal website that runs Game of Life.", We can get the response: (This is a software development task) should be answered with NO. -# 6. Given the request: "Summarize the document for me.", We can get the response: (Nothing is provided by the user, requires further information) should be answered with OOD. - -# # Instruction -# """ - -# QUICK_THINK_EXAMPLES =""" -# # Example - -# 1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response: (This requires an direct answer) should be answered with QUICK. -# 2. Request: "Help me find some of the latest research papers on deep learning.", Response: (This is a time-sensitive question) should be answered with SEARCH. -# 3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response: (This is a general knowledge question) should be answered with QUICK. -# 4. Request: "Recommend some programming practice websites suitable for beginners.", Response: (This is a general knowledge question) should be answered with QUICK. -# 5. Request: "Make a personal website that runs Game of Life.", Response: (This is a software development task) should be answered with TASK. -# 6. Request: "Summarize the document for me.", Response: (The user needs to provide a link or file path to the document) should be answered with OOD. -# 7. Request: "Optimize our process.", Response: (Clarification needed: Which specific process? What does "optimize" mean in this context?) should be answered with OOD. - -# # Instruction -# """ - - -QUICK_THINK_EXAMPLES =""" -# Example - -1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response: The user is asking for a general approach to design a platform, should be answered with QUICK. -2. Request: "Help me find some of the latest research papers on deep learning.", Response: The user is asking for the latest research papers, which is a time-sensitive question, should be answered with SEARCH. -3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response: The user is asking for a general knowledge question, should be answered with QUICK. -4. Request: "Help me develop a one week healthy eating plan.", Response: The user is asking for advice on developing a healthy eating plan. The plan can be provided directly, should be answered with QUICK. -5. Request: "Make a personal website that runs Game of Life.", Response: The user is asking for a software development task with multiple steps, should be answered with TASK. -6. Request: "Summarize the document for me.", Response: The user doesn't provide a link or file path to the document, should be answered with OOD. -7. Request: "Optimize our process.", Response: Optimizing a process is a vague request, and the user needs to clarify what process it is and what is meant by 'optimize', should be answered with OOD. - -# Instruction -""" QUICK_THINK_EXAMPLES =""" # Example @@ -270,5 +175,4 @@ Response Category: AMBIGUOUS. # Instruction """ -# QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES) \ No newline at end of file From 59f03d60bd5fea53acf722fd43ddcfa82881fed0 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 14:40:29 +0800 Subject: [PATCH 17/19] Change OOD to AMBIGUOUS --- metagpt/roles/di/role_zero.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index dc245d033..75e3d2e8d 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -274,12 +274,9 @@ class RoleZero(Role): elif "SEARCH" in intent_result: query = "\n".join(str(msg) for msg in memory) answer = await SearchEnhancedQA().run(query) - elif "OOD" or "AMBIGUOUS " in intent_result: + elif "AMBIGUOUS " in intent_result: # TODO: out of domain, ask human for help pass - else: - # TODO: TASK question - pass if answer: self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand)) From c8cc67cdb291e6af94ec19a1d5ccacd80a673e98 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 14:43:39 +0800 Subject: [PATCH 18/19] Remove teaminfo method in rolezero --- metagpt/roles/di/role_zero.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 75e3d2e8d..019bdba51 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -13,7 +13,7 @@ from metagpt.actions.analyze_requirements import AnalyzeRequirementsRestrictions from metagpt.actions.di.run_command import RunCommand from metagpt.actions.search_enhanced_qa import SearchEnhancedQA from metagpt.exp_pool import exp_cache -from metagpt.exp_pool.context_builders import RoleZeroContextBuilder, SimpleContextBuilder +from metagpt.exp_pool.context_builders import RoleZeroContextBuilder from metagpt.exp_pool.serializers import RoleZeroSerializer from metagpt.logs import logger from metagpt.prompts.di.role_zero import ( @@ -127,17 +127,7 @@ class RoleZero(Role): def _update_tool_execution(self): pass - - def _get_team_info(self) -> str: - if not self.rc.env: - return "" - team_info = "" - for role in self.rc.env.roles.values(): - # if role.profile == "Team Leader": - # continue - team_info += f"{role.name}: {role.profile}, {role.goal}\n" - return team_info - + async def _think(self) -> bool: """Useful in 'react' mode. Use LLM to decide whether and what to do next.""" # Compatibility From e0cc3e8d1bdae3222c6eb0d90b7d578a49ea3cfe Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 15:32:55 +0800 Subject: [PATCH 19/19] =?UTF-8?q?=E4=BF=AE=E6=94=B9quick=20think=20respons?= =?UTF-8?q?e=20category=20=E4=B8=B4=E6=97=B6if=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 2 +- metagpt/roles/di/role_zero.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 2aea566ae..765825198 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -129,7 +129,7 @@ For requests that are unclear, lack sufficient detail, or are outside the system - Out of Expertise: Requests for specialized advice (e.g., medical or legal advice) or highly technical tasks beyond the model's scope. - Unrealistic Scope: Overly broad requests that are impossible to address meaningfully in a single response (e.g., "Tell me everything about..."). -**Note:** Before categorizing a request as TASK, consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details, it may fall under AMBIGUOUS. +**Note:** Before categorizing a request as TASK, consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files, it should fall under AMBIGUOUS. {examples} diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 019bdba51..e87dae750 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -256,17 +256,13 @@ class RoleZero(Role): context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)]) intent_result = await self.llm.aask(context) - if "QUICK" in intent_result: - # llm call with the original context + if "QUICK" in intent_result or "AMBIGUOUS " in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "quick"}) answer = await self.llm.aask(self.llm.format_msg(memory)) elif "SEARCH" in intent_result: query = "\n".join(str(msg) for msg in memory) answer = await SearchEnhancedQA().run(query) - elif "AMBIGUOUS " in intent_result: - # TODO: out of domain, ask human for help - pass if answer: self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand))