refine a lot of code, fix pylint, use actionnode include ui, action _aask_v1, detail_mining, prepare_interview, etc.

This commit is contained in:
geekan 2023-12-19 21:17:02 +08:00 committed by better629
parent 25ea21321f
commit a75ab7971f
12 changed files with 150 additions and 215 deletions

View file

@ -11,15 +11,9 @@ from __future__ import annotations
from typing import Optional, Any
from pydantic import BaseModel, Field
from tenacity import retry, stop_after_attempt, wait_random_exponential
from metagpt.actions.action_output import ActionOutput
from metagpt.actions.action_node import ActionNode
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.provider.base_gpt_api import BaseGPTAPI
from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess
from metagpt.utils.common import OutputParser, general_after_log
action_subclass_registry = {}
@ -31,7 +25,7 @@ class Action(BaseModel):
context = ""
prefix = "" # aask*时会加上prefix作为system_message
desc = "" # for skill manager
node: ActionNode = Field(default_factory=ActionNode)
node: ActionNode = Field(default_factory=ActionNode, exclude=True)
# builtin variables
builtin_class_name: str = ""
@ -74,32 +68,6 @@ class Action(BaseModel):
system_msgs.append(self.prefix)
return await self.llm.aask(prompt, system_msgs)
@retry(
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _aask_v1(
self,
prompt: str,
output_class_name: str,
output_data_mapping: dict,
system_msgs: Optional[list[str]] = None,
format="markdown", # compatible to original format
) -> ActionOutput:
content = await self.llm.aask(prompt, system_msgs)
logger.debug(f"llm raw output:\n{content}")
output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping)
if format == "json":
parsed_data = llm_output_postprecess(output=content, schema=output_class.schema(), req_key="[/CONTENT]")
else: # using markdown parser
parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping)
logger.debug(parsed_data)
instruct_content = output_class(**parsed_data)
return ActionOutput(content, instruct_content)
async def run(self, *args, **kwargs):
"""Run action"""
raise NotImplementedError("The run method should be implemented in a subclass.")

View file

@ -6,17 +6,15 @@
@File : action_node.py
"""
import json
import re
from typing import Any, Dict, List, Optional, Type
from typing import Dict, Generic, List, Optional, Type, TypeVar
from pydantic import BaseModel, create_model, root_validator, validator
from tenacity import retry, stop_after_attempt, wait_random_exponential
from metagpt.actions import ActionOutput
from metagpt.llm import BaseGPTAPI
from metagpt.logs import logger
from metagpt.utils.common import OutputParser
from metagpt.utils.custom_decoder import CustomDecoder
from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess
from metagpt.utils.common import OutputParser, general_after_log
CONSTRAINT = """
- Language: Please use the same language as the user input.
@ -43,14 +41,17 @@ Fill in the above nodes based on the format example.
"""
def dict_to_markdown(d, prefix="###", postfix="\n"):
def dict_to_markdown(d, prefix="-", postfix="\n"):
markdown_str = ""
for key, value in d.items():
markdown_str += f"{prefix} {key}: {value}{postfix}"
return markdown_str
class ActionNode:
T = TypeVar("T")
class ActionNode(Generic[T]):
"""ActionNode is a tree of nodes."""
mode: str
@ -65,7 +66,7 @@ class ActionNode:
expected_type: Type # such as str / int / float etc.
# context: str # everything in the history.
instruction: str # the instructions should be followed.
example: Any # example for In Context-Learning.
example: T # example for In Context-Learning.
# Action Output
content: str
@ -76,7 +77,7 @@ class ActionNode:
key: str,
expected_type: Type,
instruction: str,
example: str,
example: T,
content: str = "",
children: dict[str, "ActionNode"] = None,
):
@ -148,29 +149,6 @@ class ActionNode:
new_class.__root_validator_check_missing_fields = classmethod(check_missing_fields)
return new_class
@classmethod
def create_model_class_v2(cls, class_name: str, mapping: Dict[str, Type]):
"""基于pydantic v2的模型动态生成用来检验结果类型正确性待验证"""
new_class = create_model(class_name, **mapping)
@model_validator(mode="before")
def check_missing_fields(data):
required_fields = set(mapping.keys())
missing_fields = required_fields - set(data.keys())
if missing_fields:
raise ValueError(f"Missing fields: {missing_fields}")
return data
@field_validator("*")
def check_name(v: Any, field: str) -> Any:
if field not in mapping.keys():
raise ValueError(f"Unrecognized block: {field}")
return v
new_class.__model_validator_check_missing_fields = classmethod(check_missing_fields)
new_class.__field_validator_check_name = classmethod(check_name)
return new_class
def create_children_class(self):
"""使用object内有的字段直接生成model_class"""
class_name = f"{self.key}_AN"
@ -245,6 +223,7 @@ class ActionNode:
"""
# FIXME: json instruction会带来格式问题"Project name": "web_2048 # 项目名称使用下划线",
# compile example暂时不支持markdown
self.instruction = self.compile_instruction(to="markdown", mode=mode)
self.example = self.compile_example(to=to, tag="CONTENT", mode=mode)
prompt = template.format(
@ -252,36 +231,32 @@ class ActionNode:
)
return prompt
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6))
@retry(
wait=wait_random_exponential(min=1, max=60),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _aask_v1(
self,
prompt: str,
output_class_name: str,
output_data_mapping: dict,
system_msgs: Optional[list[str]] = None,
format="markdown", # compatible to original format
) -> ActionOutput:
schema="markdown", # compatible to original format
) -> (str, BaseModel):
"""Use ActionOutput to wrap the output of aask"""
content = await self.llm.aask(prompt, system_msgs)
logger.debug(content)
output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping)
if format == "json":
pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]"
matches = re.findall(pattern, content, re.DOTALL)
for match in matches:
if match:
content = match
break
parsed_data = CustomDecoder(strict=False).decode(content)
logger.debug(f"llm raw output:\n{content}")
output_class = self.create_model_class(output_class_name, output_data_mapping)
if schema == "json":
parsed_data = llm_output_postprecess(output=content, schema=output_class.schema(), req_key="[/CONTENT]")
else: # using markdown parser
parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping)
logger.debug(parsed_data)
logger.debug(f"parsed_data:\n{parsed_data}")
instruct_content = output_class(**parsed_data)
return ActionOutput(content, instruct_content)
return content, instruct_content
def get(self, key):
return self.instruct_content.dict()[key]
@ -302,9 +277,9 @@ class ActionNode:
mapping = self.get_mapping(mode)
class_name = f"{self.key}_AN"
output = await self._aask_v1(prompt, class_name, mapping, format=to)
self.content = output.content
self.instruct_content = output.instruct_content
content, scontent = await self._aask_v1(prompt, class_name, mapping, schema=to)
self.content = content
self.instruct_content = scontent
return self
async def fill(self, context, llm, to="json", mode="auto", strgy="simple"):

View file

@ -49,7 +49,7 @@ class WriteDesign(Action):
"data structures, library tables, processes, and paths. Please provide your design, feedback " \
"clearly and in detail."
async def run(self, with_messages: Message, format: str = CONFIG.prompt_format):
async def run(self, with_messages: Message, schema: str = CONFIG.prompt_schema):
# Use `git diff` to identify which PRD documents have been modified in the `docs/prds` directory.
prds_file_repo = CONFIG.git_repo.new_file_repository(PRDS_FILE_REPO)
changed_prds = prds_file_repo.changed_files
@ -79,13 +79,13 @@ class WriteDesign(Action):
# leaving room for global optimization in subsequent steps.
return ActionOutput(content=changed_files.json(), instruct_content=changed_files)
async def _new_system_design(self, context, format=CONFIG.prompt_format):
node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format)
async def _new_system_design(self, context, schema=CONFIG.prompt_schema):
node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=schema)
return node
async def _merge(self, prd_doc, system_design_doc, format=CONFIG.prompt_format):
async def _merge(self, prd_doc, system_design_doc, schema=CONFIG.prompt_schema):
context = NEW_REQ_TEMPLATE.format(old_design=system_design_doc.content, context=prd_doc.content)
node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format)
node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=schema)
system_design_doc.content = node.instruct_content.json(ensure_ascii=False)
return system_design_doc

View file

@ -5,47 +5,31 @@
@Author : fisherdeng
@File : detail_mining.py
"""
from metagpt.actions import Action, ActionOutput
from metagpt.actions import Action
from metagpt.actions.action_node import ActionNode
PROMPT_TEMPLATE = """
##TOPIC
CONTEXT_TEMPLATE = """
## TOPIC
{topic}
##RECORD
## RECORD
{record}
##Format example
{format_example}
-----
Task: Refer to the "##TOPIC" (discussion objectives) and "##RECORD" (discussion records) to further inquire about the details that interest you, within a word limit of 150 words.
Special Note 1: Your intention is solely to ask questions without endorsing or negating any individual's viewpoints.
Special Note 2: This output should only include the topic "##OUTPUT". Do not add, remove, or modify the topic. Begin the output with '##OUTPUT', followed by an immediate line break, and then proceed to provide the content in the specified format as outlined in the "##Format example" section.
Special Note 3: The output should be in the same language as the input.
"""
FORMAT_EXAMPLE = """
##
##OUTPUT
...(Please provide the specific details you would like to inquire about here.)
##
##
"""
OUTPUT_MAPPING = {
"OUTPUT": (str, ...),
}
QUESTIONS = ActionNode(
key="Questions",
expected_type=list[str],
instruction="Task: Refer to the context to further inquire about the details that interest you, within a word limit"
" of 150 words. Please provide the specific details you would like to inquire about here",
example=["1. What ...", "2. How ...", "3. ..."],
)
class DetailMining(Action):
"""This class allows LLM to further mine noteworthy details based on specific "##TOPIC"(discussion topic) and "##RECORD" (discussion records), thereby deepening the discussion."""
"""This class allows LLM to further mine noteworthy details based on specific "##TOPIC"(discussion topic) and
"##RECORD" (discussion records), thereby deepening the discussion."""
def __init__(self, name="", context=None, llm=None):
super().__init__(name, context, llm)
async def run(self, topic, record) -> ActionOutput:
prompt = PROMPT_TEMPLATE.format(topic=topic, record=record, format_example=FORMAT_EXAMPLE)
rsp = await self._aask_v1(prompt, "detail_mining", OUTPUT_MAPPING)
async def run(self, topic, record):
context = CONTEXT_TEMPLATE.format(topic=topic, record=record)
rsp = await QUESTIONS.fill(context=context, llm=self.llm)
return rsp

View file

@ -6,35 +6,18 @@
@File : prepare_interview.py
"""
from metagpt.actions import Action
from metagpt.actions.action_node import ActionNode
PROMPT_TEMPLATE = """
# Context
{context}
## Format example
---
Q1: question 1 here
References:
- point 1
- point 2
Q2: question 2 here...
---
-----
Role: You are an interviewer of our company who is well-knonwn in frontend or backend develop;
QUESTIONS = ActionNode(
key="Questions",
expected_type=list[str],
instruction="""Role: You are an interviewer of our company who is well-knonwn in frontend or backend develop;
Requirement: Provide a list of questions for the interviewer to ask the interviewee, by reading the resume of the interviewee in the context.
Attention: Provide as markdown block as the format above, at least 10 questions.
"""
# prepare for a interview
Attention: Provide as markdown block as the format above, at least 10 questions.""",
example=["1. What ...", "2. How ..."],
)
class PrepareInterview(Action):
def __init__(self, name, context=None, llm=None):
super().__init__(name, context, llm)
async def run(self, context):
prompt = PROMPT_TEMPLATE.format(context=context)
question_list = await self._aask_v1(prompt)
return question_list
return await QUESTIONS.fill(context=context, llm=self.llm)

View file

@ -45,7 +45,7 @@ class WriteTasks(Action):
context: Optional[str] = None
llm: BaseGPTAPI = Field(default_factory=LLM)
async def run(self, with_messages, format=CONFIG.prompt_format):
async def run(self, with_messages, schema=CONFIG.prompt_schema):
system_design_file_repo = CONFIG.git_repo.new_file_repository(SYSTEM_DESIGN_FILE_REPO)
changed_system_designs = system_design_file_repo.changed_files
@ -92,16 +92,16 @@ class WriteTasks(Action):
await self._save_pdf(task_doc=task_doc)
return task_doc
async def _run_new_tasks(self, context, format=CONFIG.prompt_format):
node = await PM_NODE.fill(context, self.llm, format)
async def _run_new_tasks(self, context, schema=CONFIG.prompt_schema):
node = await PM_NODE.fill(context, self.llm, schema)
# prompt_template, format_example = get_template(templates, format)
# prompt = prompt_template.format(context=context, format_example=format_example)
# rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format)
return node
async def _merge(self, system_design_doc, task_doc, format=CONFIG.prompt_format) -> Document:
async def _merge(self, system_design_doc, task_doc, schema=CONFIG.prompt_schema) -> Document:
context = NEW_REQ_TEMPLATE.format(context=system_design_doc.content, old_tasks=task_doc.content)
node = await PM_NODE.fill(context, self.llm, format)
node = await PM_NODE.fill(context, self.llm, schema)
task_doc.content = node.instruct_content.json(ensure_ascii=False)
return task_doc

View file

@ -113,7 +113,7 @@ class WritePRD(Action):
# optimization in subsequent steps.
return ActionOutput(content=change_files.json(), instruct_content=change_files)
async def _run_new_requirement(self, requirements, format=CONFIG.prompt_format) -> ActionOutput:
async def _run_new_requirement(self, requirements, schema=CONFIG.prompt_schema) -> ActionOutput:
# sas = SearchAndSummarize()
# # rsp = await sas.run(context=requirements, system_text=SEARCH_AND_SUMMARIZE_SYSTEM_EN_US)
# rsp = ""
@ -123,7 +123,7 @@ class WritePRD(Action):
# logger.info(rsp)
project_name = CONFIG.project_name if CONFIG.project_name else ""
context = CONTEXT_TEMPLATE.format(requirements=requirements, project_name=project_name)
node = await WRITE_PRD_NODE.fill(context=context, llm=self.llm, to=format)
node = await WRITE_PRD_NODE.fill(context=context, llm=self.llm, to=schema)
await self._rename_workspace(node)
return node
@ -132,11 +132,11 @@ class WritePRD(Action):
node = await WP_IS_RELATIVE_NODE.fill(context, self.llm)
return node.get("is_relative") == "YES"
async def _merge(self, new_requirement_doc, prd_doc, format=CONFIG.prompt_format) -> Document:
async def _merge(self, new_requirement_doc, prd_doc, schema=CONFIG.prompt_schema) -> Document:
if not CONFIG.project_name:
CONFIG.project_name = Path(CONFIG.project_path).name
prompt = NEW_REQ_TEMPLATE.format(requirements=new_requirement_doc.content, old_prd=prd_doc.content)
node = await WRITE_PRD_NODE.fill(context=prompt, llm=self.llm, to=format)
node = await WRITE_PRD_NODE.fill(context=prompt, llm=self.llm, to=schema)
prd_doc.content = node.instruct_content.json(ensure_ascii=False)
await self._rename_workspace(node)
return prd_doc

View file

@ -143,7 +143,7 @@ class Config(metaclass=Singleton):
self.pyppeteer_executable_path = self._get("PYPPETEER_EXECUTABLE_PATH", "")
self.repair_llm_output = self._get("REPAIR_LLM_OUTPUT", False)
self.prompt_format = self._get("PROMPT_FORMAT", "json")
self.prompt_schema = self._get("PROMPT_FORMAT", "json")
self.workspace_path = Path(self._get("WORKSPACE_PATH", DEFAULT_WORKSPACE_ROOT))
self._ensure_workspace_exists()

View file

@ -8,10 +8,10 @@
from metagpt.config import CONFIG
def get_template(templates, format=CONFIG.prompt_format):
selected_templates = templates.get(format)
def get_template(templates, schema=CONFIG.prompt_schema):
selected_templates = templates.get(schema)
if selected_templates is None:
raise ValueError(f"Can't find {format} in passed in templates")
raise ValueError(f"Can't find {schema} in passed in templates")
# Extract the selected templates
prompt_template = selected_templates["PROMPT_TEMPLATE"]