optimized four-case intention prompts

2026-06-11 15:15:18 +02:00 · 2024-08-12 14:11:17 +08:00 · 2024-08-12 14:11:17 +08:00 · 17b51c13de
commit 17b51c13de
parent f87386b48d
2 changed files with 163 additions and 37 deletions
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -109,32 +109,20 @@ Help check if there are any formatting issues with the JSON data? If so, please
 If no issues are detected, the original json data should be returned unchanged. Do not omit any information.
 Output the JSON data in a format that can be loaded by the json.loads() function.
 """
-QUICK_THINK_SYSTEM_PROMPT = """
-{role_info}
-Your team member:
-{team_info}
-However, you MUST respond to the user message by yourself directly, DON'T ask your team members.
-"""

-QUICK_THINK_PROMPT_V2 = """
-Determine if the latest user message qualifies as a "quick question." Quick questions include:
+# QUICK_THINK_PROMPT = """
+# Decide if the latest user message previously is a quick question.
+# Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly.
+# Questions about you or your team info are also quick questions.
+# Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question.
+# However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation.

- Common-sense inquiries (e.g., general knowledge, factual information)
- Legal, logical, or math-related questions
- Multiple-choice questions
- Greetings or casual chat
- Questions about you or your team
+# If the question is a quick question, you should output QUICK to indicate the question is a quick question.
+# Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search.
+# If the query is ambiguous or requires more information, you should output OOD (Out of Domain) to indicate the question requires further clarification.

-## Exclusions:
- Time- or location-sensitive questions (e.g., weather, news inquiries) are NOT quick questions.
- Software development tasks are NOT quick questions, except for:
-
-    - Writing trivial code snippets (fewer than 30 lines)
-    - Completing a single function or class
-    - Explaining concepts, writing tutorials, or creating documentation
-    
-Respond with a concise thought followed by "YES" if the question is a quick question. Otherwise, respond with "NO." Your response:
-"""
+# Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response:
+# """

 QUICK_THINK_PROMPT = """
 Decide if the latest user message previously is a quick question.
@ -142,22 +130,145 @@ Quick questions include common-sense, legal, logical, math, multiple-choice ques
 Questions about you or your team info are also quick questions.
 Software development tasks are NOT quick questions. Code execution, however trivial, is NOT a quick question.
 However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation.
-Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search.
-If the query is ambiguous, you should output OOD (Out of Domain) to indicate the question is out of the domain.

-Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO, a SEARCH, or an OOD. Your response:
+## QUICK
+If the question is a quick question, you should output QUICK to indicate the question is a quick question.
+## SEARCH
+If the question is a time- or location-sensitive such as wheather or news inquiry, you should output a keyword SEARCH to indicate the need for a google search.
+## TASK
+If the question is a software development task, or requires multiple steps of planning an execution, you should output a keyword TASK.
+## OOD
+If the question is ambiguous or requires more context such as link, file path, or the task cannot be done without more user's assistance, you should output OOD (Out of Domain).
+
+Respond with a concise thought then a QUICK if the question is a quick question, otherwise, a SEARCH, a TASK, or an OOD. Your response:
+
+"""
+
+QUICK_THINK_PROMPT = """
+Response Categories:
+
+## QUICK
+For straightforward questions or requests that can be answered directly. Quick questions include common-sense, legal, logical, math, short-coding, multiple-choice questions, greetings, or casual chat that you can answer directly. Questions about you or your team info are also quick questions.
+## SEARCH
+For queries that require up-to-date or detailed information retrieval. These include time- or location-sensitive questions, such as weather or news inquiries. However, no need to perform a search if the information is readily available.
+## TASK
+For complex, multi-step tasks that involve a series of actions or detailed instructions.
+## AMBIGUOUS
+For requests that are ambiguous, lack necessary information, or fall outside the system's capabilities. AMBIGUOUS requests have these common properties:
+- Incomplete Information: Requests that mention tasks but lack critical details (e.g., no document provided for summarization).
+- Vagueness: Requests that are too broad, unclear, or unspecified, making it difficult to respond effectively.
+- Out of Expertise: Requests that ask for specialized advice (e.g., legal, medical) or highly technical tasks outside the model's design.
+- Unrealistic Scope: The request is too extensive or unrealistic to address within a single response (e.g., “Tell me everything about…”).
+
+
+Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Response:
+
+"""
+
+
+QUICK_THINK_PROMPT = """
+# Response Categories:
+## QUICK: 
+For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, and inquiries about you or your team.
+
+## SEARCH
+For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn’t readily available.
+
+## TASK
+For complex requests that involve multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires a sequence of actions.
+
+## AMBIGUOUS
+For requests that are unclear, lack sufficient detail, or are outside the system's capabilities. Common characteristics of AMBIGUOUS requests:
+
+- Incomplete Information: Lacking critical details needed to perform the task (e.g., fail to provide dependent files, links, or context for a task).
+- Vagueness: Broad, unspecified, or unclear requests that make it difficult to provide a precise answer. 
+- Out of Expertise: Requests for specialized advice (e.g., medical or legal advice) or highly technical tasks beyond the model's scope.
+- Unrealistic Scope: Overly broad requests that are impossible to address meaningfully in a single response (e.g., "Tell me everything about...").
+
+{examples}
+
+Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Your response:
+"""
+
+# QUICK_THINK_EXAMPLES ="""
+# # Example
+
+# 1. Given the request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", We can get the response:  (This requires an direct answer) should be answered with YES.
+# 2. Given the request: "Help me find some of the latest research papers on deep learning.", We can get the response: (This is a time-sensitive question) should be answered with SEARCH.
+# 3. Given the request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", We can get the response:  (This is a general knowledge question) should be answered with YES.
+# 4. Given the request: "Recommend some programming practice websites suitable for beginners.", We can get the response: (This is a general knowledge question) should be answered with YES.
+# 5. Given the request: "Make a personal website that runs Game of Life.", We can get the response:  (This is a software development task) should be answered with NO.
+# 6. Given the request: "Summarize the document for me.", We can get the response:  (Nothing is provided by the user, requires further information) should be answered with OOD.
+
+# # Instruction
+# """
+
+# QUICK_THINK_EXAMPLES ="""
+# # Example
+
+# 1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response:  (This requires an direct answer) should be answered with QUICK.
+# 2. Request: "Help me find some of the latest research papers on deep learning.", Response: (This is a time-sensitive question) should be answered with SEARCH.
+# 3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response:  (This is a general knowledge question) should be answered with QUICK.
+# 4. Request: "Recommend some programming practice websites suitable for beginners.", Response: (This is a general knowledge question) should be answered with QUICK.
+# 5. Request: "Make a personal website that runs Game of Life.", Response:  (This is a software development task) should be answered with TASK.
+# 6. Request: "Summarize the document for me.", Response:  (The user needs to provide a link or file path to the document) should be answered with OOD.
+# 7. Request: "Optimize our process.", Response:  (Clarification needed: Which specific process? What does "optimize" mean in this context?) should be answered with OOD.
+
+# # Instruction
+# """
+
+
+QUICK_THINK_EXAMPLES ="""
+# Example
+
+1. Request: "How to design an online document editing platform that supports real-time collaboration? Please answer me directly.", Response:  The user is asking for a general approach to design a platform, should be answered with QUICK.
+2. Request: "Help me find some of the latest research papers on deep learning.", Response: The user is asking for the latest research papers, which is a time-sensitive question, should be answered with SEARCH.
+3. Request: "Tell me the difference between supervised learning and unsupervised learning in machine learning.", Response: The user is asking for a general knowledge question, should be answered with QUICK.
+4. Request: "Help me develop a one week healthy eating plan.", Response: The user is asking for advice on developing a healthy eating plan. The plan can be provided directly, should be answered with QUICK.
+5. Request: "Make a personal website that runs Game of Life.", Response:  The user is asking for a software development task with multiple steps, should be answered with TASK.
+6. Request: "Summarize the document for me.", Response:  The user doesn't provide a link or file path to the document, should be answered with OOD.
+7. Request: "Optimize our process.", Response:  Optimizing a process is a vague request, and the user needs to clarify what process it is and what is meant by 'optimize', should be answered with OOD.
+
+# Instruction
 """

 QUICK_THINK_EXAMPLES ="""
 # Example

-1. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "How to design an online document editing platform that supports real-time collaboration? Please answer me directly."}}], We can get the response:  (It requires an direct answer) should be answered with "YES.", which scored: 10.
-2. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Help me find some of the latest research papers on deep learning."}}], We can get the response: (This is a time-sensitive question) should be answered with "SEARCH.", which scored: 10.
-3. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Tell me the difference between supervised learning and unsupervised learning in machine learning."}}], We can get the response:  (This is a general knowledge question) should be answered with "YES.", which scored: 10.
-4. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Recommend some programming practice websites suitable for beginners."}}], We can get the response: (This is a general knowledge question) should be answered with "YES.", which scored: 10.
-5. Given the request: [{"role": "user", "content": {"instruction": "Determine if the latest user message qualifies as a quick question.", "request": "Make a personal website that runs Game of Life."}}], We can get the response:  (This is a software development task) should be answered with "NO.", which scored: 10.
+1. Request: "How do I design an online document editing platform that supports real-time collaboration?"
+Thought: This is a direct query about platform design, answerable without additional resources. 
+Response Category: QUICK.
+
+2. Request: "What's the difference between supervised and unsupervised learning in machine learning?"
+Thought: This is a general knowledge question that can be answered concisely. 
+Response Category: QUICK.
+
+3. Request: "Can you help me plan a healthy diet for a week?"
+Thought: The user is requesting a simple plan that can be provided immediately. 
+Response Category: QUICK.
+
+4. Request: "Can you help me find the latest research papers on deep learning?"
+Thought: The user needs current research, requiring a search for the most recent sources. 
+Response Category: SEARCH.
+
+5. Request: "Build a personal website that runs the Game of Life simulation."
+Thought: This is a detailed software development task that requires multiple steps. 
+Response Category: TASK.
+
+6. Request: "Summarize this document for me."
+Thought: The request mentions summarizing a document but doesn't provide the document itself, making it impossible to fulfill. 
+Response Category: AMBIGUOUS.
+
+7. Request: "Optimize this process." 
+Thought: The request is vague and lacks specifics, requiring clarification on the process to optimize.
+Response Category: AMBIGUOUS.
+
+8. Request: "Create a poster for our upcoming event." 
+Thought: Critical details like event theme, date, and location are missing, making it impossible to complete the task.
+Response Category: AMBIGUOUS.

 # Instruction
 """

-QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT
+# QUICK_THINK_PROMPT = QUICK_THINK_EXAMPLES + QUICK_THINK_PROMPT
+QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES)
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -13,7 +13,7 @@ from metagpt.actions.analyze_requirements import AnalyzeRequirementsRestrictions
 from metagpt.actions.di.run_command import RunCommand
 from metagpt.actions.search_enhanced_qa import SearchEnhancedQA
 from metagpt.exp_pool import exp_cache
-from metagpt.exp_pool.context_builders import RoleZeroContextBuilder
+from metagpt.exp_pool.context_builders import RoleZeroContextBuilder, SimpleContextBuilder
 from metagpt.exp_pool.serializers import RoleZeroSerializer
 from metagpt.logs import logger
 from metagpt.prompts.di.role_zero import (
@ -127,7 +127,17 @@ class RoleZero(Role):

    def _update_tool_execution(self):
        pass
-
+    
+    def _get_team_info(self) -> str:
+        if not self.rc.env:
+            return ""
+        team_info = ""
+        for role in self.rc.env.roles.values():
+            # if role.profile == "Team Leader":
+            #     continue
+            team_info += f"{role.name}: {role.profile}, {role.goal}\n"
+        return team_info
+    
    async def _think(self) -> bool:
        """Useful in 'react' mode. Use LLM to decide whether and what to do next."""
        # Compatibility
@ -189,9 +199,8 @@ class RoleZero(Role):
        The `RoleZeroContextBuilder` attempts to add experiences to `req`.
        The `RoleZeroSerializer` extracts essential parts of `req` for the experience pool, trimming lengthy entries to retain only necessary parts.
        """
-
        return await self.llm.aask(req, system_msgs=system_msgs)
-
+                      
    async def parse_browser_actions(self, memory: List[Message]) -> List[Message]:
        if not self.browser.is_empty_page:
            pattern = re.compile(r"Command Browser\.(\w+) executed")
@ -257,7 +266,7 @@ class RoleZero(Role):
        context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)])
        intent_result = await self.llm.aask(context)

-        if "YES" in intent_result:
+        if "QUICK" in intent_result:
            # llm call with the original context
            async with ThoughtReporter(enable_llm_stream=True) as reporter:
                await reporter.async_report({"type": "quick"})
@ -265,6 +274,12 @@ class RoleZero(Role):
        elif "SEARCH" in intent_result:
            query = "\n".join(str(msg) for msg in memory)
            answer = await SearchEnhancedQA().run(query)
+        elif "OOD" or "AMBIGUOUS " in intent_result:
+            # TODO: out of domain, ask human for help
+            pass
+        else:
+            # TODO: TASK question
+            pass

        if answer:
            self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand))