add search intent & fix a bug in parsing browser action

2026-06-08 15:05:17 +02:00 · 2024-08-04 12:40:41 +08:00 · 2024-08-04 12:40:41 +08:00 · 99c440b17a
commit 99c440b17a
parent 654ed131d7
4 changed files with 65 additions and 24 deletions
--- a/metagpt/actions/search_enhanced_qa.py
+++ b/metagpt/actions/search_enhanced_qa.py
@ -11,6 +11,7 @@ from metagpt.actions.research import CollectLinks, WebBrowseAndSummarize
 from metagpt.logs import logger
 from metagpt.tools.web_browser_engine import WebBrowserEngine
 from metagpt.utils.common import CodeParser
+from metagpt.utils.report import ThoughtReporter

 REWRITE_QUERY_PROMPT = """
 Role: You are a highly efficient assistant that provide a better search query for web search engine to answer the given question.
@ -237,4 +238,8 @@ class SearchEnhancedQA(Action):

        system_prompt = SEARCH_ENHANCED_QA_SYSTEM_PROMPT.format(context=context)

-        return await self._aask(query, [system_prompt])
+        async with ThoughtReporter(enable_llm_stream=True) as reporter:
+            await reporter.async_report({"type": "quick"})
+            rsp = await self._aask(query, [system_prompt])
+
+        return rsp
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -114,8 +114,8 @@ QUICK_THINK_PROMPT = """
 Decide if the latest user message previously is a quick question.
 Quick questions include common-sense, legal, logical, math, multiple-choice questions, greetings, or casual chat that you can answer directly.
 Questions about you or your team info are also quick questions.
-Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions.
+Time- or location-sensitive questions such as wheather or news inquiry are NOT quick questions. Moreover, you should output a keyword SEARCH to indicate the need for a google search.
 Software development tasks are NOT quick questions.
 However, these programming-related tasks are quick questions: writing trivial code snippets (fewer than 30 lines), filling a single function or class, explaining concepts, writing tutorials and documentation.
-Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO. Your response:
+Respond with a concise thought then a YES if the question is a quick question, otherwise, a NO or a SEARCH. Your response:
 """
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -11,6 +11,7 @@ from pydantic import model_validator
 from metagpt.actions import Action, UserRequirement
 from metagpt.actions.analyze_requirements import AnalyzeRequirementsRestrictions
 from metagpt.actions.di.run_command import RunCommand
+from metagpt.actions.search_enhanced_qa import SearchEnhancedQA
 from metagpt.exp_pool import exp_cache
 from metagpt.exp_pool.context_builders import RoleZeroContextBuilder
 from metagpt.exp_pool.serializers import RoleZeroSerializer
@ -196,7 +197,7 @@ class RoleZero(Role):
        if not self.browser.is_empty_page:
            pattern = re.compile(r"Command Browser\.(\w+) executed")
            for index, msg in zip(range(len(memory), 0, -1), memory[::-1]):
-                if pattern.match(msg.content):
+                if pattern.search(msg.content):
                    memory.insert(index, UserMessage(cause_by="browser", content=await self.browser.view()))
                    break
        return memory
@ -225,7 +226,7 @@ class RoleZero(Role):
        self._set_state(0)

        # problems solvable by quick thinking doesn't need to a formal think-act cycle
-        quick_rsp = await self._quick_think()
+        quick_rsp, _ = await self._quick_think()
        if quick_rsp:
            return quick_rsp

@ -245,22 +246,28 @@ class RoleZero(Role):
            actions_taken += 1
        return rsp  # return output from the last action

-    async def _quick_think(self) -> Message:
+    async def _quick_think(self) -> Tuple[Message, str]:
+        answer = ""
        rsp_msg = None
        if self.rc.news[-1].cause_by != any_to_str(UserRequirement):
            # Agents themselves won't generate quick questions, use this rule to reduce extra llm calls
-            return rsp_msg
+            return rsp_msg, ""

        # routing
-        memory = self.get_memories(k=4)
+        memory = self.get_memories(k=4)  # FIXME: A magic number for two rounds of Q&A
        context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)])
-        rsp = await self.llm.aask(context)
+        intent_result = await self.llm.aask(context)

-        if "yes" in rsp.lower():
+        if "YES" in intent_result:
            # llm call with the original context
            async with ThoughtReporter(enable_llm_stream=True) as reporter:
                await reporter.async_report({"type": "quick"})
                answer = await self.llm.aask(self.llm.format_msg(memory))
+        elif "SEARCH" in intent_result:
+            query = "\n".join(str(msg) for msg in memory)
+            answer = await SearchEnhancedQA().run(query)
+
+        if answer:
            self.rc.memory.add(AIMessage(content=answer, cause_by=RunCommand))
            await self.reply_to_human(content=answer)
            rsp_msg = AIMessage(
@ -269,7 +276,7 @@ class RoleZero(Role):
                cause_by=RunCommand,
            )

-        return rsp_msg
+        return rsp_msg, intent_result

    async def _check_duplicates(self, req: list[dict], command_rsp: str):
        past_rsp = [mem.content for mem in self.rc.memory.get(self.memory_k)]
--- a/tests/metagpt/roles/di/test_routing.py
+++ b/tests/metagpt/roles/di/test_routing.py
@ -19,18 +19,41 @@ NORMAL_QUESTION = [
    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
    """,
    """
+    Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
+    The first page product name, price, product URL, and image URL must be saved in the csv;**
+    """,
+    """
    Write a fix for this issue: https://github.com/langchain-ai/langchain/issues/20453, 
    you can fix it on this repo https://github.com/garylin2099/langchain,
    checkout a branch named test-fix, commit your changes, push, and create a PR to the master branch of https://github.com/iorisa/langchain
    """,
-    ## info searching ##
-    """When is the Olympic football final this year, where will it be held, and where can I buy tickets? If possible, please provide me with a link to buy tickets""",
-    """Help me search for Inter Miami CF home games in the next 2 months and give me the link to buy tickets""",
-    """请为我查找位于深圳大学附近1000米范围内，价格适中（性价比最高），且晚上关门时间晚于22:00的健身房。""",
-    "今天的天气怎样",
-    "奥运会的开幕式是什么时候",
+    "Open this link and make a sumamry: https://github.com/geekan/MetaGPT",  # should not confuse with searching
+    "清查看这个网页https://platform.openai.com/docs/models",  # should not confuse with searching
 ]

+
+SEARCH_QUESTION = [
+    "今天的天气怎样？",
+    "全球智能手机市场份额排名是什么？前三名的品牌各占多少百分比？",
+    "中国股市上市公司数量是多少？",
+    "奥运会将在哪里举行？有哪些新增的比赛项目？",
+    "最近一周全球原油价格的走势如何？",
+    "当前全球碳排放量最大的三个国家是哪些？",
+    "当前全球碳排放量最大的三个国家各占多少比例",
+    "最新的全球教育质量排名中，前五名的国家是哪些？",
+    "当前全球最大的几家电动汽车制造商是哪些？",
+    "奥运会的开幕式是什么时候",
+    "Recommend some gyms near Shenzhen University",
+    "Which university tops QS ranking?",
+    "Which university tops QS ranking this year?",
+    "The stock price of Nvidia?",
+    # longer questions
+    "请为我查找位于深圳大学附近1000米范围内，价格适中（性价比最高），且晚上关门时间晚于22:00的健身房。",
+    "When is the Olympic football final this year, where will it be held, and where can I buy tickets? If possible, please provide me with a link to buy tickets",
+    "Help me search for Inter Miami CF home games in the next 2 months and give me the link to buy tickets",
+]
+
+
 QUICK_QUESTION = [
    ## general knowledge qa, logical, math ##
    """Who is the first man landing on Moon""",
@ -102,22 +125,28 @@ async def test_routing_acc():
    for q in QUICK_QUESTION:
        msg = Message(content=q)
        role.put_message(msg)
-        # await env.run()
        await role._observe()
-        rsp = await role._quick_think()
+        rsp, intent_result = await role._quick_think()
        role.rc.memory.clear()
-        if not rsp:
+        if "YES" not in intent_result:
            logger.error(f"Quick question failed: {q}")
-        # assert rsp
+
+    for q in SEARCH_QUESTION:
+        msg = Message(content=q)
+        role.put_message(msg)
+        await role._observe()
+        rsp, intent_result = await role._quick_think()
+        role.rc.memory.clear()
+        if "SEARCH" not in intent_result:
+            logger.error(f"Search question failed: {q}")

    for q in NORMAL_QUESTION:
        msg = Message(content=q)
        role.put_message(msg)
        await role._observe()
-        rsp = await role._quick_think()
+        rsp, intent_result = await role._quick_think()
        role.rc.memory.clear()
-        # assert not rsp
-        if rsp:
+        if "NO" not in intent_result:
            logger.error(f"Normal question failed: {q}")