update example for mgx

add intent detect action and mgx role
2026-07-17 16:41:05 +02:00 · 2024-03-30 12:05:22 +08:00 · 2024-03-30 12:05:22 +08:00 · a9938472fc
commit a9938472fc
parent 154e108fb0
4 changed files with 357 additions and 0 deletions
--- a/examples/mgx/init.py
+++ b/examples/mgx/init.py
@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
--- a/examples/mgx/run_mgx.py
+++ b/examples/mgx/run_mgx.py
@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import asyncio
+
+from metagpt.roles.di.mgx import MGX
+
+requirement = (
+    "design a game using Gym (an open source Python library), including a graphical interface and interactive gameplay"
+)
+
+
+async def main(requirement: str = ""):
+    mgx = MGX(ususe_intent=True)
+    await mgx.run(requirement)
+
+
+if __name__ == "__main__":
+    asyncio.run(main(requirement))
--- a/metagpt/actions/intent_detect.py
+++ b/metagpt/actions/intent_detect.py
@ -0,0 +1,261 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+This script is designed to classify intentions from complete conversation content.
+
+Usage:
+    This script can be used to classify intentions from a conversation. It utilizes models for detecting intentions
+    from the text provided and categorizes them accordingly. If the intention of certain words or phrases is unclear,
+    it prompts the user for clarification.
+
+Dependencies:
+    This script depends on the metagpt library, pydantic, and other utilities for message parsing and interaction.
+
+"""
+import json
+from typing import List
+
+from pydantic import BaseModel, Field
+
+from metagpt.actions import Action
+from metagpt.logs import logger
+from metagpt.schema import Message
+from metagpt.utils.common import parse_json_code_block
+
+
+class SOPItem(BaseModel):
+    """
+    Represents an item in a Standard Operating Procedure (SOP).
+
+    Attributes:
+        description (str): The description or title of the SOP.
+        sop (List[str]): The steps or details of the SOP.
+    """
+
+    description: str
+    sop: List[str]
+
+
+SOP_CONFIG = [
+    SOPItem(
+        description="Intentions related to or including software development, such as developing or building software, games, programming, app, websites, etc.",
+        sop=[
+            "Writes a PRD based on software requirements.",
+            "Writes a design to the project repository, based on the PRD of the project.",
+            "Writes a project plan to the project repository, based on the design of the project.",
+            "Writes codes to the project repository, based on the project plan of the project.",
+            "Run QA test on the project repository.",
+            "Stage and commit changes for the project repository using Git.",
+        ],
+    )
+]
+
+
+class IntentDetectClarification(BaseModel):
+    """
+    Represents clarifications for unclear intentions.
+
+    Attributes:
+        ref (str): The reference to the original words.
+        clarification (str): A question for the user to clarify the intention of the unclear words.
+    """
+
+    ref: str
+    clarification: str
+
+
+class IntentDetectIntentionRef(BaseModel):
+    """
+    Represents intentions along with their references.
+
+    Attributes:
+        intent (str): The intention from the "Intentions" section.
+        refs (List[str]): List of original text references from the "Dialog" section that match the intention.
+    """
+
+    intent: str
+    refs: List[str]
+
+
+class IntentDetectIntentionSOP(BaseModel):
+    """
+    Represents an intention mapped to a Standard Operating Procedure (SOP).
+
+    Attributes:
+        intention (IntentDetectIntentionRef): Reference to the intention.
+        sop (SOPItem, optional): Standard Operating Procedure (SOP) item related to the intention.
+    """
+
+    intention: IntentDetectIntentionRef
+    sop: SOPItem = None
+
+
+class IntentDetectResult(BaseModel):
+    """
+    Represents the result of intention detection.
+
+    Attributes:
+        clarifications (List[IntentDetectClarification]): List of clarifications for unclear intentions.
+        intentions (List[IntentDetectIntentionSOP]): List of intentions mapped to Standard Operating Procedures (SOPs).
+    """
+
+    clarifications: List[IntentDetectClarification] = Field(default_factory=list)
+    intentions: List[IntentDetectIntentionSOP] = Field(default_factory=list)
+
+
+class IntentDetect(Action):
+    """
+    Action class for intention detection.
+
+    Attributes:
+        _dialog_intentions (IntentDetectDialogIntentions): Instance of IntentDetectDialogIntentions.
+            Dialog intentions for matching user intentions.
+        _references (IntentDetectReferences): Instance of IntentDetectReferences.
+            References to intentions and unreferenced content.
+        _intent_to_sops (List[IntentSOP]): List of IntentSOP objects.
+            Mapping of intentions to Standard Operating Procedures (SOPs).
+        result (IntentDetectResult): Instance of IntentDetectResult.
+            Result object containing the outcome of intention detection.
+    """
+
+    class IntentDetectDialogIntentions(BaseModel):
+        class IntentDetectIntention(BaseModel):
+            ref: str
+            intent: str
+
+        intentions: List[IntentDetectIntention]
+        clarifications: List[IntentDetectClarification]
+
+    class IntentDetectReferences(BaseModel):
+        class IntentDetectUnrefs(BaseModel):
+            ref: str
+            reason: str
+
+        intentions: List[IntentDetectIntentionRef]
+        unrefs: List[IntentDetectUnrefs]
+
+    class IntentSOP(BaseModel):
+        intent: str
+        sop: str
+        sop_index: int
+        reason: str
+
+    _dialog_intentions: IntentDetectDialogIntentions = None
+    _references: IntentDetectReferences = None
+    _intent_to_sops: List[IntentSOP] = None
+    result: IntentDetectResult = None
+
+    async def run(self, with_messages: List[Message] = None, **kwargs) -> Message:
+        """
+        Runs the intention detection action.
+
+        Args:
+            with_messages (List[Message]): List of messages representing the conversation content.
+            **kwargs: Additional keyword arguments.
+        """
+        msg_markdown = self._message_to_markdown(with_messages)
+        intentions = await self._get_intentions(msg_markdown)
+        logger.info(intentions)
+        await self._get_references(msg_markdown, intentions)
+        await self._get_sops()
+        self.result = IntentDetectResult(clarifications=self._dialog_intentions.clarifications)
+        sops = {i.description: i for i in SOP_CONFIG}
+        intent_to_sops = {i.intent: i.sop for i in self._intent_to_sops if i.sop != ""}
+        for i in self._references.intentions:
+            item = IntentDetectIntentionSOP(intention=i)
+            key = intent_to_sops.get(i.intent)
+            if key:
+                item.sop = sops.get(key)
+            self.result.intentions.append(item)
+
+        return Message(
+            content=self.result.model_dump_json(), role="assistant", cause_by=self, instruct_content=self.result
+        )
+
+    async def _get_intentions(self, msg_markdown: str) -> List[str]:
+        rsp = await self.llm.aask(
+            msg_markdown,
+            system_msgs=[
+                "You are a tool that can classify user intentions.",
+                "Detect and classify the intention of each word spoken by the user in the conversation.",
+                "If the user's intention is not clear, create a request for the user to clarify the intention of"
+                " the unclear words.",
+                "Return a markdown object with:\n"
+                '- an "intentions" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the original words reference;\n'
+                '  - an "intent" key explaining the intention of the referenced word;\n'
+                '- a "clarifications" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the original words reference;\n'
+                '  - a "clarification" key containing a question, in the tone of an assistant, prompts the user to provide more details about the intention regarding the unclear word(s) referenced in the user\'s description.',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        if not json_blocks:
+            return []
+        self._dialog_intentions = self.IntentDetectDialogIntentions.model_validate_json(json_blocks[0])
+        return [i.intent for i in self._dialog_intentions.intentions]
+
+    async def _get_references(self, msg_markdown: str, intentions: List[str]):
+        intention_list = "\n".join([f"- {i}" for i in intentions])
+        prompt = f"## Dialog\n{msg_markdown}\n---\n## Intentions\n{intention_list}\n"
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a tool that categorizes text content by intent.",
+                "Place the original text from the `Dialog` section under the matching intent of `Intentions` section.",
+                "Allow different intents to reference the same original text.",
+                "Return a markdown JSON object with:\n"
+                '- an "intentions" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "intent" key containing the intention from "Intentions" section;\n'
+                '  - a "refs" key containing a list of strings of original text from the "Dialog" section that match'
+                " the intention.\n"
+                '- a "unrefs" key containing a list of JSON objects, where each object contains:\n'
+                '  - a "ref" key containing the unreferenced original text.\n'
+                '  - a "reason" key explaining why it is unreferenced.\n',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        if not json_blocks:
+            return []
+        self._references = self.IntentDetectReferences.model_validate_json(json_blocks[0])
+
+    async def _get_sops(self):
+        intention_list = ""
+        for i, v in enumerate(self._references.intentions):
+            intention_list += f"{i + 1}. intent: {v.intent}\n"
+            for j in v.refs:
+                intention_list += f"   - ref: {j}\n"
+        sop_list = ""
+        for i, v in enumerate(SOP_CONFIG):
+            sop_list += f"{i + 1}. {v.description}\n"
+        prompt = f"## Intentions\n{intention_list}\n---\n## SOPs\n{sop_list}\n"
+        rsp = await self.llm.aask(
+            prompt,
+            system_msgs=[
+                "You are a tool that matches user intentions with Standard Operating Procedures (SOPs).",
+                'You search for matching SOPs under "SOPs" based on user intentions in "Intentions" and their related original descriptions.',
+                'Inspect each intention in "Intentions".',
+                "Return a markdown JSON list of objects, where each object contains:\n"
+                '- an "intent" key containing the intention from the "Intentions" section;\n'
+                '- a "sop" key containing the SOP description from the "SOPs" section; filled with an empty string if no match.\n'
+                '- a "sop_index" key containing the int type index of SOP description from the "SOPs" section; filled with 0 if no match.\n'
+                '- a "reason" key explaining why it is matching/mismatching.\n',
+            ],
+            stream=False,
+        )
+        logger.debug(rsp)
+        json_blocks = parse_json_code_block(rsp)
+        vv = json.loads(json_blocks[0])
+        self._intent_to_sops = [self.IntentSOP.model_validate(i) for i in vv]
+
+    @staticmethod
+    def _message_to_markdown(messages) -> str:
+        markdown = ""
+        for i in messages:
+            content = i.content.replace("\n", " ")
+            markdown += f"> {i.role}: {content}\n>\n"
+        return markdown
--- a/metagpt/roles/di/mgx.py
+++ b/metagpt/roles/di/mgx.py
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import asyncio
+import json
+from typing import Dict, List
+
+from metagpt.actions.intent_detect import IntentDetect
+from metagpt.logs import logger
+from metagpt.roles.di.data_interpreter import DataInterpreter
+from metagpt.schema import Message
+
+
+class MGX(DataInterpreter):
+    use_intent: bool = True
+    intents: Dict = {}
+
+    async def _intent_detect(self, user_msgs: List[Message] = None, **kwargs):
+        todo = IntentDetect(context=self.context)
+        intent_desp = await todo.run(user_msgs)
+        intent_desp = json.loads(intent_desp.content)
+        logger.info(f"intent_desp is {intent_desp}")
+
+        # Extract intent and sop prompt
+        intents = intent_desp.get("intentions", [{}])[0]
+        # Optional: handle the case where intentions might be empty or malformatted
+        intention_ref = intents.get("intention", {}).get("refs", [None])[0]
+        sop = intents.get("sop", {}).get("sop", None)
+        self.intents.update({intention_ref: sop})
+
+        if sop is None:
+            return intention_ref
+        return intention_ref + "\n---" + "\n".join(intents["sop"]["sop"])
+
+    async def _plan_and_act(self) -> Message:
+        """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
+
+        # create initial plan and update it until confirmation
+        goal = self.rc.memory.get()[-1].content  # retreive latest user requirement
+        if self.use_intent:  # add mode
+            user_message = Message(content=goal, role="user")
+            goal = await self._intent_detect(user_msgs=[user_message])
+        logger.info(f"Goal is {goal}")
+
+        await self.planner.update_plan(goal=goal)
+
+        # take on tasks until all finished
+        while self.planner.current_task:
+            task = self.planner.current_task
+            logger.info(f"ready to take on task {task}")
+
+            # take on current task
+            task_result = await self._act_on_task(task)
+
+            # process the result, such as reviewing, confirming, plan updating
+            await self.planner.process_task_result(task_result)
+
+        rsp = self.planner.get_useful_memories()[0]  # return the completed plan as a response
+
+        self.rc.memory.add(rsp)  # add to persistent memory
+
+        return rsp
+
+
+if __name__ == "__main__":
+    test_requirements = [
+        "design a game using Gym (an open source Python library), including a graphical interface and interactive gameplay"
+        # "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
+        # "获取https://www.stats.gov.cn/sj/sjjd/202307/t20230718_1941322.html的内容，并返回上半年cpi的增长或下降幅度"
+    ]
+
+    for requirement in test_requirements:
+        mgx = MGX()
+        rsp = asyncio.run(mgx.run(requirement))