refine a lot of code, fix pylint, use actionnode include ui, action _aask_v1, detail_mining, prepare_interview, etc.

2026-07-02 16:01:04 +02:00 · 2023-12-19 21:17:02 +08:00 · 2023-12-19 21:17:02 +08:00 · a75ab7971f
commit a75ab7971f
parent 25ea21321f
12 changed files with 150 additions and 215 deletions
--- a/metagpt/actions/action.py
+++ b/metagpt/actions/action.py
@ -11,15 +11,9 @@ from __future__ import annotations
 from typing import Optional, Any

 from pydantic import BaseModel, Field
-from tenacity import retry, stop_after_attempt, wait_random_exponential
-
-from metagpt.actions.action_output import ActionOutput
 from metagpt.actions.action_node import ActionNode
 from metagpt.llm import LLM
-from metagpt.logs import logger
 from metagpt.provider.base_gpt_api import BaseGPTAPI
-from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess
-from metagpt.utils.common import OutputParser, general_after_log


 action_subclass_registry = {}
@ -31,7 +25,7 @@ class Action(BaseModel):
    context = ""
    prefix = ""  # aask*时会加上prefix，作为system_message
    desc = ""  # for skill manager
-    node: ActionNode = Field(default_factory=ActionNode)
+    node: ActionNode = Field(default_factory=ActionNode, exclude=True)

    # builtin variables
    builtin_class_name: str = ""
@ -74,32 +68,6 @@ class Action(BaseModel):
        system_msgs.append(self.prefix)
        return await self.llm.aask(prompt, system_msgs)

-    @retry(
-        wait=wait_random_exponential(min=1, max=60),
-        stop=stop_after_attempt(6),
-        after=general_after_log(logger),
-    )
-    async def _aask_v1(
-            self,
-            prompt: str,
-            output_class_name: str,
-            output_data_mapping: dict,
-            system_msgs: Optional[list[str]] = None,
-            format="markdown",  # compatible to original format
-    ) -> ActionOutput:
-        content = await self.llm.aask(prompt, system_msgs)
-        logger.debug(f"llm raw output:\n{content}")
-        output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping)
-
-        if format == "json":
-            parsed_data = llm_output_postprecess(output=content, schema=output_class.schema(), req_key="[/CONTENT]")
-        else:  # using markdown parser
-            parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping)
-
-        logger.debug(parsed_data)
-        instruct_content = output_class(**parsed_data)
-        return ActionOutput(content, instruct_content)
-
    async def run(self, *args, **kwargs):
        """Run action"""
        raise NotImplementedError("The run method should be implemented in a subclass.")
--- a/metagpt/actions/action_node.py
+++ b/metagpt/actions/action_node.py
@ -6,17 +6,15 @@
@File    : action_node.py
 """
 import json
-import re
-from typing import Any, Dict, List, Optional, Type
+from typing import Dict, Generic, List, Optional, Type, TypeVar

 from pydantic import BaseModel, create_model, root_validator, validator
 from tenacity import retry, stop_after_attempt, wait_random_exponential

-from metagpt.actions import ActionOutput
 from metagpt.llm import BaseGPTAPI
 from metagpt.logs import logger
-from metagpt.utils.common import OutputParser
-from metagpt.utils.custom_decoder import CustomDecoder
+from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess
+from metagpt.utils.common import OutputParser, general_after_log

 CONSTRAINT = """
 - Language: Please use the same language as the user input.
@ -43,14 +41,17 @@ Fill in the above nodes based on the format example.
 """


-def dict_to_markdown(d, prefix="###", postfix="\n"):
+def dict_to_markdown(d, prefix="-", postfix="\n"):
    markdown_str = ""
    for key, value in d.items():
        markdown_str += f"{prefix} {key}: {value}{postfix}"
    return markdown_str


-class ActionNode:
+T = TypeVar("T")
+
+
+class ActionNode(Generic[T]):
    """ActionNode is a tree of nodes."""

    mode: str
@ -65,7 +66,7 @@ class ActionNode:
    expected_type: Type  # such as str / int / float etc.
    # context: str  # everything in the history.
    instruction: str  # the instructions should be followed.
-    example: Any  # example for In Context-Learning.
+    example: T  # example for In Context-Learning.

    # Action Output
    content: str
@ -76,7 +77,7 @@ class ActionNode:
        key: str,
        expected_type: Type,
        instruction: str,
-        example: str,
+        example: T,
        content: str = "",
        children: dict[str, "ActionNode"] = None,
    ):
@ -148,29 +149,6 @@ class ActionNode:
        new_class.__root_validator_check_missing_fields = classmethod(check_missing_fields)
        return new_class

-    @classmethod
-    def create_model_class_v2(cls, class_name: str, mapping: Dict[str, Type]):
-        """基于pydantic v2的模型动态生成，用来检验结果类型正确性，待验证"""
-        new_class = create_model(class_name, **mapping)
-
-        @model_validator(mode="before")
-        def check_missing_fields(data):
-            required_fields = set(mapping.keys())
-            missing_fields = required_fields - set(data.keys())
-            if missing_fields:
-                raise ValueError(f"Missing fields: {missing_fields}")
-            return data
-
-        @field_validator("*")
-        def check_name(v: Any, field: str) -> Any:
-            if field not in mapping.keys():
-                raise ValueError(f"Unrecognized block: {field}")
-            return v
-
-        new_class.__model_validator_check_missing_fields = classmethod(check_missing_fields)
-        new_class.__field_validator_check_name = classmethod(check_name)
-        return new_class
-
    def create_children_class(self):
        """使用object内有的字段直接生成model_class"""
        class_name = f"{self.key}_AN"
@ -245,6 +223,7 @@ class ActionNode:
        """

        # FIXME: json instruction会带来格式问题，如："Project name": "web_2048  # 项目名称使用下划线",
+        # compile example暂时不支持markdown
        self.instruction = self.compile_instruction(to="markdown", mode=mode)
        self.example = self.compile_example(to=to, tag="CONTENT", mode=mode)
        prompt = template.format(
@ -252,36 +231,32 @@ class ActionNode:
        )
        return prompt

-    @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6))
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        after=general_after_log(logger),
+    )
    async def _aask_v1(
        self,
        prompt: str,
        output_class_name: str,
        output_data_mapping: dict,
        system_msgs: Optional[list[str]] = None,
-        format="markdown",  # compatible to original format
-    ) -> ActionOutput:
+        schema="markdown",  # compatible to original format
+    ) -> (str, BaseModel):
+        """Use ActionOutput to wrap the output of aask"""
        content = await self.llm.aask(prompt, system_msgs)
-        logger.debug(content)
-        output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping)
-
-        if format == "json":
-            pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]"
-            matches = re.findall(pattern, content, re.DOTALL)
-
-            for match in matches:
-                if match:
-                    content = match
-                    break
-
-            parsed_data = CustomDecoder(strict=False).decode(content)
+        logger.debug(f"llm raw output:\n{content}")
+        output_class = self.create_model_class(output_class_name, output_data_mapping)

+        if schema == "json":
+            parsed_data = llm_output_postprecess(output=content, schema=output_class.schema(), req_key="[/CONTENT]")
        else:  # using markdown parser
            parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping)

-        logger.debug(parsed_data)
+        logger.debug(f"parsed_data:\n{parsed_data}")
        instruct_content = output_class(**parsed_data)
-        return ActionOutput(content, instruct_content)
+        return content, instruct_content

    def get(self, key):
        return self.instruct_content.dict()[key]
@ -302,9 +277,9 @@ class ActionNode:
        mapping = self.get_mapping(mode)

        class_name = f"{self.key}_AN"
-        output = await self._aask_v1(prompt, class_name, mapping, format=to)
-        self.content = output.content
-        self.instruct_content = output.instruct_content
+        content, scontent = await self._aask_v1(prompt, class_name, mapping, schema=to)
+        self.content = content
+        self.instruct_content = scontent
        return self

    async def fill(self, context, llm, to="json", mode="auto", strgy="simple"):
--- a/metagpt/actions/design_api.py
+++ b/metagpt/actions/design_api.py
@ -49,7 +49,7 @@ class WriteDesign(Action):
                "data structures, library tables, processes, and paths. Please provide your design, feedback " \
                "clearly and in detail."

-    async def run(self, with_messages: Message, format: str = CONFIG.prompt_format):
+    async def run(self, with_messages: Message, schema: str = CONFIG.prompt_schema):
        # Use `git diff` to identify which PRD documents have been modified in the `docs/prds` directory.
        prds_file_repo = CONFIG.git_repo.new_file_repository(PRDS_FILE_REPO)
        changed_prds = prds_file_repo.changed_files
@ -79,13 +79,13 @@ class WriteDesign(Action):
        # leaving room for global optimization in subsequent steps.
        return ActionOutput(content=changed_files.json(), instruct_content=changed_files)

-    async def _new_system_design(self, context, format=CONFIG.prompt_format):
-        node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format)
+    async def _new_system_design(self, context, schema=CONFIG.prompt_schema):
+        node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=schema)
        return node

-    async def _merge(self, prd_doc, system_design_doc, format=CONFIG.prompt_format):
+    async def _merge(self, prd_doc, system_design_doc, schema=CONFIG.prompt_schema):
        context = NEW_REQ_TEMPLATE.format(old_design=system_design_doc.content, context=prd_doc.content)
-        node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format)
+        node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=schema)
        system_design_doc.content = node.instruct_content.json(ensure_ascii=False)
        return system_design_doc

--- a/metagpt/actions/detail_mining.py
+++ b/metagpt/actions/detail_mining.py
@ -5,47 +5,31 @@
@Author  : fisherdeng
@File    : detail_mining.py
 """
-from metagpt.actions import Action, ActionOutput
+from metagpt.actions import Action
+from metagpt.actions.action_node import ActionNode

-PROMPT_TEMPLATE = """
-##TOPIC
+CONTEXT_TEMPLATE = """
+## TOPIC
 {topic}

-##RECORD
+## RECORD
 {record}
-
-##Format example
-{format_example}
-----
-
-Task: Refer to the "##TOPIC" (discussion objectives) and "##RECORD" (discussion records) to further inquire about the details that interest you, within a word limit of 150 words.
-Special Note 1: Your intention is solely to ask questions without endorsing or negating any individual's viewpoints.
-Special Note 2: This output should only include the topic "##OUTPUT". Do not add, remove, or modify the topic. Begin the output with '##OUTPUT', followed by an immediate line break, and then proceed to provide the content in the specified format as outlined in the "##Format example" section.
-Special Note 3: The output should be in the same language as the input.
 """
-FORMAT_EXAMPLE = """

-##
-
-##OUTPUT
-...(Please provide the specific details you would like to inquire about here.)
-
-##
-
-##
-"""
-OUTPUT_MAPPING = {
-    "OUTPUT": (str, ...),
-}
+QUESTIONS = ActionNode(
+    key="Questions",
+    expected_type=list[str],
+    instruction="Task: Refer to the context to further inquire about the details that interest you, within a word limit"
+    " of 150 words. Please provide the specific details you would like to inquire about here",
+    example=["1. What ...", "2. How ...", "3. ..."],
+)


 class DetailMining(Action):
-    """This class allows LLM to further mine noteworthy details based on specific "##TOPIC"(discussion topic) and "##RECORD" (discussion records), thereby deepening the discussion."""
+    """This class allows LLM to further mine noteworthy details based on specific "##TOPIC"(discussion topic) and
+    "##RECORD" (discussion records), thereby deepening the discussion."""

-    def __init__(self, name="", context=None, llm=None):
-        super().__init__(name, context, llm)
-
-    async def run(self, topic, record) -> ActionOutput:
-        prompt = PROMPT_TEMPLATE.format(topic=topic, record=record, format_example=FORMAT_EXAMPLE)
-        rsp = await self._aask_v1(prompt, "detail_mining", OUTPUT_MAPPING)
+    async def run(self, topic, record):
+        context = CONTEXT_TEMPLATE.format(topic=topic, record=record)
+        rsp = await QUESTIONS.fill(context=context, llm=self.llm)
        return rsp
--- a/metagpt/actions/prepare_interview.py
+++ b/metagpt/actions/prepare_interview.py
@ -6,35 +6,18 @@
@File    : prepare_interview.py
 """
 from metagpt.actions import Action
+from metagpt.actions.action_node import ActionNode

-PROMPT_TEMPLATE = """
-# Context
-{context}
-
-## Format example
---
-Q1: question 1 here
-References:
-  - point 1
-  - point 2
-
-Q2: question 2 here...
---
-
-----
-Role: You are an interviewer of our company who is well-knonwn in frontend or backend develop;
+QUESTIONS = ActionNode(
+    key="Questions",
+    expected_type=list[str],
+    instruction="""Role: You are an interviewer of our company who is well-knonwn in frontend or backend develop;
 Requirement: Provide a list of questions for the interviewer to ask the interviewee, by reading the resume of the interviewee in the context.
-Attention: Provide as markdown block as the format above, at least 10 questions.
-"""
-
-# prepare for a interview
+Attention: Provide as markdown block as the format above, at least 10 questions.""",
+    example=["1. What ...", "2. How ..."],
+)


 class PrepareInterview(Action):
-    def __init__(self, name, context=None, llm=None):
-        super().__init__(name, context, llm)
-
    async def run(self, context):
-        prompt = PROMPT_TEMPLATE.format(context=context)
-        question_list = await self._aask_v1(prompt)
-        return question_list
+        return await QUESTIONS.fill(context=context, llm=self.llm)
--- a/metagpt/actions/project_management.py
+++ b/metagpt/actions/project_management.py
@ -45,7 +45,7 @@ class WriteTasks(Action):
    context: Optional[str] = None
    llm: BaseGPTAPI = Field(default_factory=LLM)

-    async def run(self, with_messages, format=CONFIG.prompt_format):
+    async def run(self, with_messages, schema=CONFIG.prompt_schema):
        system_design_file_repo = CONFIG.git_repo.new_file_repository(SYSTEM_DESIGN_FILE_REPO)
        changed_system_designs = system_design_file_repo.changed_files

@ -92,16 +92,16 @@ class WriteTasks(Action):
        await self._save_pdf(task_doc=task_doc)
        return task_doc

-    async def _run_new_tasks(self, context, format=CONFIG.prompt_format):
-        node = await PM_NODE.fill(context, self.llm, format)
+    async def _run_new_tasks(self, context, schema=CONFIG.prompt_schema):
+        node = await PM_NODE.fill(context, self.llm, schema)
        # prompt_template, format_example = get_template(templates, format)
        # prompt = prompt_template.format(context=context, format_example=format_example)
        # rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format)
        return node

-    async def _merge(self, system_design_doc, task_doc, format=CONFIG.prompt_format) -> Document:
+    async def _merge(self, system_design_doc, task_doc, schema=CONFIG.prompt_schema) -> Document:
        context = NEW_REQ_TEMPLATE.format(context=system_design_doc.content, old_tasks=task_doc.content)
-        node = await PM_NODE.fill(context, self.llm, format)
+        node = await PM_NODE.fill(context, self.llm, schema)
        task_doc.content = node.instruct_content.json(ensure_ascii=False)
        return task_doc

--- a/metagpt/actions/write_prd.py
+++ b/metagpt/actions/write_prd.py
@ -113,7 +113,7 @@ class WritePRD(Action):
        # optimization in subsequent steps.
        return ActionOutput(content=change_files.json(), instruct_content=change_files)

-    async def _run_new_requirement(self, requirements, format=CONFIG.prompt_format) -> ActionOutput:
+    async def _run_new_requirement(self, requirements, schema=CONFIG.prompt_schema) -> ActionOutput:
        # sas = SearchAndSummarize()
        # # rsp = await sas.run(context=requirements, system_text=SEARCH_AND_SUMMARIZE_SYSTEM_EN_US)
        # rsp = ""
@ -123,7 +123,7 @@ class WritePRD(Action):
        #     logger.info(rsp)
        project_name = CONFIG.project_name if CONFIG.project_name else ""
        context = CONTEXT_TEMPLATE.format(requirements=requirements, project_name=project_name)
-        node = await WRITE_PRD_NODE.fill(context=context, llm=self.llm, to=format)
+        node = await WRITE_PRD_NODE.fill(context=context, llm=self.llm, to=schema)
        await self._rename_workspace(node)
        return node

@ -132,11 +132,11 @@ class WritePRD(Action):
        node = await WP_IS_RELATIVE_NODE.fill(context, self.llm)
        return node.get("is_relative") == "YES"

-    async def _merge(self, new_requirement_doc, prd_doc, format=CONFIG.prompt_format) -> Document:
+    async def _merge(self, new_requirement_doc, prd_doc, schema=CONFIG.prompt_schema) -> Document:
        if not CONFIG.project_name:
            CONFIG.project_name = Path(CONFIG.project_path).name
        prompt = NEW_REQ_TEMPLATE.format(requirements=new_requirement_doc.content, old_prd=prd_doc.content)
-        node = await WRITE_PRD_NODE.fill(context=prompt, llm=self.llm, to=format)
+        node = await WRITE_PRD_NODE.fill(context=prompt, llm=self.llm, to=schema)
        prd_doc.content = node.instruct_content.json(ensure_ascii=False)
        await self._rename_workspace(node)
        return prd_doc
--- a/metagpt/config.py
+++ b/metagpt/config.py
@ -143,7 +143,7 @@ class Config(metaclass=Singleton):
        self.pyppeteer_executable_path = self._get("PYPPETEER_EXECUTABLE_PATH", "")

        self.repair_llm_output = self._get("REPAIR_LLM_OUTPUT", False)
-        self.prompt_format = self._get("PROMPT_FORMAT", "json")
+        self.prompt_schema = self._get("PROMPT_FORMAT", "json")
        self.workspace_path = Path(self._get("WORKSPACE_PATH", DEFAULT_WORKSPACE_ROOT))
        self._ensure_workspace_exists()

--- a/metagpt/utils/get_template.py
+++ b/metagpt/utils/get_template.py
@ -8,10 +8,10 @@
 from metagpt.config import CONFIG


-def get_template(templates, format=CONFIG.prompt_format):
-    selected_templates = templates.get(format)
+def get_template(templates, schema=CONFIG.prompt_schema):
+    selected_templates = templates.get(schema)
    if selected_templates is None:
-        raise ValueError(f"Can't find {format} in passed in templates")
+        raise ValueError(f"Can't find {schema} in passed in templates")

    # Extract the selected templates
    prompt_template = selected_templates["PROMPT_TEMPLATE"]