feat: +intent detect

2026-06-08 15:05:17 +02:00 · 2024-03-26 19:41:19 +08:00 · 2024-03-26 19:41:19 +08:00 · 6680458695
commit 6680458695
parent 12948a5482
4 changed files with 468 additions and 0 deletions
--- a/metagpt/actions/intent_detect.py
+++ b/metagpt/actions/intent_detect.py
@ -0,0 +1,307 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+This script is designed to classify intentions from complete conversation content.
+
+Usage:
+    This script can be used to classify intentions from a conversation. It utilizes models for detecting intentions
+    from the text provided and categorizes them accordingly. If the intention of certain words or phrases is unclear,
+    it prompts the user for clarification.
+
+Dependencies:
+    This script depends on the metagpt library, pydantic, and other utilities for message parsing and interaction.
+
+"""
+import json
+from typing import List
+
+from pydantic import BaseModel, Field
+
+from metagpt.actions import Action
+from metagpt.logs import logger
+from metagpt.schema import Message
+from metagpt.utils.common import parse_json_code_block
+
+
+class SOPItem(BaseModel):
+    """
+    Represents an item in a Standard Operating Procedure (SOP).
+
+    Attributes:
+        description (str): The description or title of the SOP.
+        sop (List[str]): The steps or details of the SOP.
+    """
+
+    description: str
+    sop: List[str]
+
+
+SOP_CONFIG = [
+    SOPItem(
+        description="Intentions related to or including software development, such as developing or building software, games, programming, app, websites, etc.",
+        sop=[
+            "Writes a PRD based on software requirements.",
+            "Writes a design to the project repository, based on the PRD of the project.",
+            "Writes a project plan to the project repository, based on the design of the project.",
+            "Writes codes to the project repository, based on the project plan of the project.",
+            "Run QA test on the project repository.",
+            "Stage and commit changes for the project repository using Git.",
+        ],
+    )
+]
+
+
+class _IntentDetectIntention(BaseModel):
+    """
+    Represents detected intentions.
+
+    Attributes:
+        ref (str): The reference to the original words.
+        intent (str): The detected intention of the referenced words.
+    """
+
+    ref: str
+    intent: str
+
+
+class IntentDetectClarification(BaseModel):
+    """
+    Represents clarifications for unclear intentions.
+
+    Attributes:
+        ref (str): The reference to the original words.
+        clarification (str): A question for the user to clarify the intention of the unclear words.
+    """
+
+    ref: str
+    clarification: str
+
+
+class _IntentDetectDialogIntentions(BaseModel):
+    """
+    Represents dialog intentions.
+
+    Attributes:
+        intentions (List[IntentDetectIntention]): List of detected intentions.
+        clarifications (List[IntentDetectClarification]): List of clarifications for unclear intentions.
+    """
+
+    intentions: List[_IntentDetectIntention]
+    clarifications: List[IntentDetectClarification]
+
+
+class IntentDetectIntentionRef(BaseModel):
+    """
+    Represents intentions along with their references.
+
+    Attributes:
+        intent (str): The intention from the "Intentions" section.
+        refs (List[str]): List of original text references from the "Dialog" section that match the intention.
+    """
+
+    intent: str
+    refs: List[str]
+
+
+class _IntentDetectUnrefs(BaseModel):
+    """
+    Represents unreferenced content along with reasons.
+
+    Attributes:
+        ref (str): The unreferenced original text.
+        reason (str): Explanation for why it is unreferenced.
+    """
+
+    ref: str
+    reason: str
+
+
+class _IntentSOP(BaseModel):
+    """
+    Represents a mapping between an intention and a Standard Operating Procedure (SOP).
+
+    Attributes:
+        intent (str): The intention related to the SOP.
+        sop (str): The description of the Standard Operating Procedure.
+        sop_index (int): The index of the description of the Standard Operating Procedure.
+        reason (str): Explanation for why the intention is unreferenced.
+    """
+
+    intent: str
+    sop: str
+    sop_index: int
+    reason: str
+
+
+class _IntentDetectReferences(BaseModel):
+    """
+    Represents references to intentions and unreferenced content.
+
+    Attributes:
+        intentions (List[IntentDetectIntentionRef]): List of intentions with their references.
+        unrefs (List[IntentDetectUnrefs]): List of unreferenced content with reasons.
+    """
+
+    intentions: List[IntentDetectIntentionRef]
+    unrefs: List[_IntentDetectUnrefs]
+
+
+class IntentDetectIntentionSOP(BaseModel):
+    """
+    Represents an intention mapped to a Standard Operating Procedure (SOP).
+
+    Attributes:
+        intention (IntentDetectIntentionRef): Reference to the intention.
+        sop (SOPItem, optional): Standard Operating Procedure (SOP) item related to the intention.
+    """
+
+    intention: IntentDetectIntentionRef
+    sop: SOPItem = None
+
+
+class IntentDetectResult(BaseModel):
+    """
+    Represents the result of intention detection.
+
+    Attributes:
+        clarifications (List[IntentDetectClarification]): List of clarifications for unclear intentions.
+        intentions (List[IntentDetectIntentionSOP]): List of intentions mapped to Standard Operating Procedures (SOPs).
+    """
+
+    clarifications: List[IntentDetectClarification] = Field(default_factory=list)
+    intentions: List[IntentDetectIntentionSOP] = Field(default_factory=list)
+
+
+class IntentDetect(Action):
+    """
+    Action class for intention detection.
+
+    Attributes:
+        _dialog_intentions (IntentDetectDialogIntentions): Instance of IntentDetectDialogIntentions.
+            Dialog intentions for matching user intentions.
+        _references (IntentDetectReferences): Instance of IntentDetectReferences.
+            References to intentions and unreferenced content.
+        _intent_to_sops (List[IntentSOP]): List of IntentSOP objects.
+            Mapping of intentions to Standard Operating Procedures (SOPs).
+        result (IntentDetectResult): Instance of IntentDetectResult.
+            Result object containing the outcome of intention detection.
+    """
+
+    _dialog_intentions: _IntentDetectDialogIntentions = None
+    _references: _IntentDetectReferences = None
+    _intent_to_sops: List[_IntentSOP] = None
+    result: IntentDetectResult = None
+
+    async def run(self, with_messages: List[Message] = None, **kwargs) -> Message:
+        """
+        Runs the intention detection action.
+
+        Args:
+            with_messages (List[Message]): List of messages representing the conversation content.
+            **kwargs: Additional keyword arguments.
+        """
+        msg_markdown = self._message_to_markdown(with_messages)
+        intentions = await self._get_intentions(msg_markdown)
+        await self._get_references(msg_markdown, intentions)
+        await self._get_sops()
+        self.result = IntentDetectResult(clarifications=self._dialog_intentions.clarifications)
+        sops = {i.description: i for i in SOP_CONFIG}
+        intent_to_sops = {i.intent: i.sop for i in self._intent_to_sops if i.sop != ""}
+        for i in self._references.intentions:
+            item = IntentDetectIntentionSOP(intention=i)
+            key = intent_to_sops.get(i.intent)
+            if key:
+                item.sop = sops.get(key)
+            self.result.intentions.append(item)
+
+        return Message(
+            content=self.result.model_dump_json(), role="assistant", cause_by=self, instruct_content=self.result
+        )
+
+    async def _get_intentions(self, msg_markdown: str) -> List[str]:
+        rsp = await self.llm.aask(
+            msg_markdown,
+            system_msgs=[
+                "You are a tool that can classify user intentions.",
+                "Detect and classify the intention of each word spoken by the user in the conversation.",
+                "If the user's intention is not clear, create a request for the user to clarify the intention of"
+                " the unclear words.",
+                "Return a markdown object with:\n"
+                '- an "intentions" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the original words reference;\n'
+                '  - an "intent" key explaining the intention of the referenced word;\n'
+                '- a "clarifications" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the original words reference;\n'
+                '  - a "clarification" key containing a question, in the tone of an assistant, prompts the user to provide more details about the intention regarding the unclear word(s) referenced in the user\'s description.',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        if not json_blocks:
+            return []
+        self._dialog_intentions = _IntentDetectDialogIntentions.model_validate_json(json_blocks[0])
+        return [i.intent for i in self._dialog_intentions.intentions]
+
+    async def _get_references(self, msg_markdown: str, intentions: List[str]):
+        intention_list = "\n".join([f"- {i}" for i in intentions])
+        prompt = f"## Dialog\n{msg_markdown}\n---\n## Intentions\n{intention_list}\n"
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a tool that categorizes text content by intent.",
+                "Place the original text from the `Dialog` section under the matching intent of `Intentions` section.",
+                "Allow different intents to reference the same original text.",
+                "Return a markdown JSON object with:\n"
+                '- an "intentions" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "intent" key containing the intention from "Intentions" section;\n'
+                '  - a "refs" key containing a list of strings of original text from the "Dialog" section that match'
+                " the intention.\n"
+                '- a "unrefs" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the unreferenced original text.\n'
+                '  - a "reason" key explaining why it is unreferenced.\n',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        if not json_blocks:
+            return []
+        self._references = _IntentDetectReferences.model_validate_json(json_blocks[0])
+
+    async def _get_sops(self):
+        intention_list = ""
+        for i, v in enumerate(self._references.intentions):
+            intention_list += f"{i + 1}. intent: {v.intent}\n"
+            for j in v.refs:
+                intention_list += f"   - ref: {j}\n"
+        sop_list = ""
+        for i, v in enumerate(SOP_CONFIG):
+            sop_list += f"{i + 1}. {v.description}\n"
+        prompt = f"## Intentions\n{intention_list}\n---\n## SOPs\n{sop_list}\n"
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a tool that matches user intentions with Standard Operating Procedures (SOPs).",
+                'You search for matching SOPs under "SOPs" based on user intentions in "Intentions" and their related original descriptions.',
+                'Inspect each intention in "Intentions".',
+                "Return a markdown JSON list of objects, where each object contains:\n"
+                '- an "intent" key containing the intention from the "Intentions" section;\n'
+                '- a "sop" key containing the SOP description from the "SOPs" section; filled with an empty string if no match.\n'
+                '- a "sop_index" key containing the int type index of SOP description from the "SOPs" section; filled with 0 if no match.\n'
+                '- a "reason" key explaining why it is matching/mismatching.\n',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        vv = json.loads(json_blocks[0])
+        self._intent_to_sops = [_IntentSOP.model_validate(i) for i in vv]
+
+    @staticmethod
+    def _message_to_markdown(messages) -> str:
+        markdown = ""
+        for i in messages:
+            content = i.content.replace("\n", " ")
+            markdown += f"> {i.role}: {content}\n>\n"
+        return markdown
--- a/metagpt/tools/libs/dialog.py
+++ b/metagpt/tools/libs/dialog.py
@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+This script defines tools for dialog.
+"""
+
+from typing import List
+
+from metagpt.actions.intent_detect import IntentDetect, IntentDetectResult
+from metagpt.context import Context
+from metagpt.schema import Message
+from metagpt.tools.tool_registry import register_tool
+
+
+@register_tool(tags=["dialog", "intent detect"])
+async def intent_detect(messages: List[Message]) -> IntentDetectResult:
+    """Detects intent from a list of dialog messages.
+
+    Args:
+        messages (List[Message]): A list of dialog messages.
+
+    Returns:
+        IntentDetectResult: The result of intent detection.
+
+    Example:
+        >>> # Create messages
+        >>> dialog = [
+        >>>     {"role":"user", "content":"user queries ..."},
+        >>>     {"role":"assistant", "content": "assistant answers ..."},
+        >>>     ...
+        >>> ]
+        >>> from metagpt.schema import Message
+        >>> messages = [Message.model_validate(i) for i in dialog]
+        >>> result = await intent_detect(messages)
+        >>> print(result.model_dump_json())
+        {
+            "clarifications": [
+                {
+                    "ref": "web app",
+                    "clarification": "Could you provide more details about what you are looking to achieve with ...?"
+                }
+            ],
+            "intentions": [
+                {
+                    "intention": {
+                        "intent": "Request to build a service that can receive text messages and ...",
+                        "refs": [
+                            "Can you build TextToSummarize which is a SMS number that I can text ..."
+                        ]
+                    },
+                    "sop": {
+                        "description": "Intentions related to or including software development, such as ...",
+                        "sop": [
+                            "Writes a PRD based on software requirements.",
+                            "Writes a design to the project repository, based on the PRD of the project.",
+                            "Writes a project plan to the project repository, based on the design of the project.",
+                            "Writes codes to the project repository, based on the project plan of the project.",
+                            "Run QA test on the project repository.",
+                            "Stage and commit changes for the project repository using Git."
+                        ]
+                    }
+                },
+                {
+                    "intention": {
+                        "intent": "Request for a phone number to send text messages for the summarization service",
+                        "refs": []
+                    },
+                    "sop": null
+                }
+            ]
+        }
+    """
+    ctx = Context()
+    action = IntentDetect(context=ctx)
+    await action.run(messages)
+    return action.result
--- a/tests/metagpt/actions/test_intent_detect.py
+++ b/tests/metagpt/actions/test_intent_detect.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import json
+
+import pytest
+
+from metagpt.actions.intent_detect import IntentDetect
+from metagpt.schema import Message
+
+DEMO_CONTENT = [
+    {
+        "role": "user",
+        "content": "Can you build TextToSummarize which is a SMS number that I can text and it will scrape a website "
+        "and summarize it with ChatGPT",
+    },
+    {
+        "role": "assistant",
+        "content": "Absolutely, I can build a service like TextToSummarize for you. The process will involve setting up"
+        " an SMS service that can receive your texts, scraping the website content you send, and then using"
+        " an AI to summarize the content.\nTo get started, I'll need to set up a number for receiving SMS"
+        ", then I'll work on the script to scrape the website content from the URLs you text, and finally."
+        " I'll integrate it with an AI service to Generalte the summaries.\n I'll keep you updated on my"
+        " progress!",
+    },
+    {"role": "user", "content": "What exactly do we need the web app for?"},
+    {
+        "role": "assistant",
+        "content": "The web app will service as the interface between the SMS service and the AI summarization service"
+        ". When you send a text with a URL to the SMS number, teh web app will process that, scrape the"
+        " content from the URL, and then use the AI to summarize it. The summary will then be sent back to"
+        " you. It's the central hub that ties all the components of the service togather.",
+    },
+]
+
+DEMO1_CONTENT = [
+    {
+        "role": "user",
+        "content": "Extract all of the blog posts from `https://stripe.com/blog/page/1` and return a CSV with the"
+        " columns `date`, `article_text`, `author` and `summary`. Generate a summary for each article"
+        " yourself.",
+    }
+]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "content",
+    [json.dumps(DEMO1_CONTENT), json.dumps(DEMO_CONTENT)],
+)
+async def test_intent_detect(content: str, context):
+    action = IntentDetect(context=context)
+    messages = [Message.model_validate(i) for i in json.loads(content)]
+    rsp = await action.run(messages)
+    assert isinstance(rsp, Message)
+    assert action._dialog_intentions
+    assert action._references
+    assert action._intent_to_sops
+    assert action.result
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-s"])
--- a/tests/metagpt/tools/libs/test_dialog.py
+++ b/tests/metagpt/tools/libs/test_dialog.py
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import pytest
+
+from metagpt.actions.intent_detect import IntentDetectResult
+from metagpt.logs import logger
+from metagpt.schema import Message
+from metagpt.tools.libs.dialog import intent_detect
+from tests.metagpt.actions.test_intent_detect import DEMO_CONTENT
+
+
+@pytest.mark.asyncio
+async def test_intent_detect():
+    messages = [Message.model_validate(i) for i in DEMO_CONTENT]
+    result = await intent_detect(messages)
+    assert isinstance(result, IntentDetectResult)
+    assert result
+    logger.info(f"dialog:{DEMO_CONTENT}\nresult:{result.model_dump_json()}")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-s"])