From 17999934475b308d7efc54f776db8b0f5be14bd7 Mon Sep 17 00:00:00 2001 From: femto Date: Thu, 7 Sep 2023 21:54:31 +0800 Subject: [PATCH] action json --- metagpt/actions/action.py | 18 +- metagpt/actions/design_api_json.py | 129 +++++++++++++++ metagpt/actions/project_management.py | 2 +- metagpt/actions/write_prd_json.py | 12 +- metagpt/roles/architect.py | 7 +- metagpt/roles/project_manager.py | 3 +- metagpt/utils/custom_decoder.py | 230 ++++++++++++++++++++++++++ requirements.txt | 1 - 8 files changed, 384 insertions(+), 18 deletions(-) create mode 100644 metagpt/actions/design_api_json.py create mode 100644 metagpt/utils/custom_decoder.py diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index aed3824b1..f2dcd86d4 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -5,16 +5,19 @@ @Author : alexanderwu @File : action.py """ +import ast import json from abc import ABC from typing import Optional +import re from tenacity import retry, stop_after_attempt, wait_fixed -import regex + from metagpt.actions.action_output import ActionOutput from metagpt.llm import LLM from metagpt.utils.common import OutputParser +from metagpt.utils.custom_decoder import CustomDecoder from metagpt.logs import logger @@ -77,12 +80,13 @@ class Action(ABC): content = await self.llm.aask(prompt, system_msgs) logger.debug(content) output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) - json_regex = r"\{(?:[^{}]|(?R))*\}" - json = regex.search( - json_regex, content - ).group() - generated_plan = json.loads(json) - parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) + + pattern = r'\[CONTENT\](.*?)\[/CONTENT\]' + + # Use re.findall to extract content between the tags + extracted_content = re.search(pattern, content, re.DOTALL).group(1) + + parsed_data = CustomDecoder(strict=False).decode(extracted_content) logger.debug(parsed_data) instruct_content = output_class(**parsed_data) return ActionOutput(content, instruct_content) diff --git a/metagpt/actions/design_api_json.py b/metagpt/actions/design_api_json.py new file mode 100644 index 000000000..fc20a37e0 --- /dev/null +++ b/metagpt/actions/design_api_json.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/5/11 19:26 +@Author : alexanderwu +@File : design_api.py +""" +import shutil +from pathlib import Path +from typing import List + +from metagpt.actions import Action, ActionOutput +from metagpt.const import WORKSPACE_ROOT +from metagpt.logs import logger +from metagpt.utils.common import CodeParser +from metagpt.utils.mermaid import mermaid_to_file + +PROMPT_TEMPLATE = """ +# Context +{context} + +## Format example +{format_example} +----- +Role: You are an architect; the goal is to design a SOTA PEP8-compliant python system; make the best use of good open source tools +Requirement: Fill in the following missing information based on the context, note that all sections are response with code form separately +Max Output: 8192 chars or 2048 tokens. Try to use them up. + +## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select the appropriate open-source framework. + +## Python package name: Provide as Python str with python triple quoto, concise and clear, characters only use a combination of all lowercase and underscores + +## File list: Provided as Python list[str], the list of ONLY REQUIRED files needed to write the program(LESS IS MORE!). Only need relative paths, comply with PEP8 standards. ALWAYS write a main.py or app.py here + +## Data structures and interface definitions: Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions (with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design. + +## Program call flow: Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT. + +## Anything UNCLEAR: Provide as Plain text. Make clear here. + +Your job is to create a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like format example +""" +FORMAT_EXAMPLE = """ +[CONTENT] +{ + "Implementation approach": "We will ...", + "Python package name": "snake_game", + "File list": ["main.py"], + "Data structures and interface definitions": ' + classDiagram + class Game{ + +int score + } + ... + Game "1" -- "1" Food: has + ', + "Program call flow": ' + sequenceDiagram + participant M as Main + ... + G->>M: end game + ', + "Anything UNCLEAR": "The requirement is clear to me." +} +[/CONTENT] +""" +OUTPUT_MAPPING = { + "Implementation approach": (str, ...), + "Python package name": (str, ...), + "File list": (List[str], ...), + "Data structures and interface definitions": (str, ...), + "Program call flow": (str, ...), + "Anything UNCLEAR": (str, ...), +} + + +class WriteDesignJson(Action): + def __init__(self, name, context=None, llm=None): + super().__init__(name, context, llm) + self.desc = "Based on the PRD, think about the system design, and design the corresponding APIs, " \ + "data structures, library tables, processes, and paths. Please provide your design, feedback " \ + "clearly and in detail." + + def recreate_workspace(self, workspace: Path): + try: + shutil.rmtree(workspace) + except FileNotFoundError: + pass # Folder does not exist, but we don't care + workspace.mkdir(parents=True, exist_ok=True) + + def _save_prd(self, docs_path, resources_path, context): + prd_file = docs_path / 'prd.md' + quadrant_chart = context[-1].instruct_content.dict()['Competitive Quadrant Chart'] + mermaid_to_file(quadrant_chart, resources_path / 'competitive_analysis') + logger.info(f"Saving PRD to {prd_file}") + prd_file.write_text(context[-1].content) + + def _save_system_design(self, docs_path, resources_path, system_design): + data_api_design = system_design.instruct_content.dict()['Data structures and interface definitions'] #CodeParser.parse_code(block="Data structures and interface definitions", text=content) + seq_flow = system_design.instruct_content.dict()['Program call flow'] #CodeParser.parse_code(block="Program call flow", text=content) + mermaid_to_file(data_api_design, resources_path / 'data_api_design') + mermaid_to_file(seq_flow, resources_path / 'seq_flow') + system_design_file = docs_path / 'system_design.md' + logger.info(f"Saving System Designs to {system_design_file}") + system_design_file.write_text(system_design.content) + + def _save(self, context, system_design): + if isinstance(system_design, ActionOutput): + content = system_design.content + ws_name = system_design.instruct_content.dict()['Python package name'] + else: + content = system_design + ws_name = CodeParser.parse_str(block="Python package name", text=system_design) + workspace = WORKSPACE_ROOT / ws_name + self.recreate_workspace(workspace) + docs_path = workspace / 'docs' + resources_path = workspace / 'resources' + docs_path.mkdir(parents=True, exist_ok=True) + resources_path.mkdir(parents=True, exist_ok=True) + self._save_prd(docs_path, resources_path, context) + self._save_system_design(docs_path, resources_path, system_design) + + async def run(self, context): + prompt = PROMPT_TEMPLATE.format(context=context, format_example=FORMAT_EXAMPLE) + # system_design = await self._aask(prompt) + system_design = await self._aask_json_v1(prompt, "system_design", OUTPUT_MAPPING) + self._save(context, system_design) + return system_design + \ No newline at end of file diff --git a/metagpt/actions/project_management.py b/metagpt/actions/project_management.py index 3096eb94b..b69009645 100644 --- a/metagpt/actions/project_management.py +++ b/metagpt/actions/project_management.py @@ -107,7 +107,7 @@ class WriteTasks(Action): super().__init__(name, context, llm) def _save(self, context, rsp): - ws_name = CodeParser.parse_str(block="Python package name", text=context[-1].content) + ws_name = context[-1].instruct_content.dict()["Python package name"]#CodeParser.parse_str(block="Python package name", text=context[-1].content) file_path = WORKSPACE_ROOT / ws_name / 'docs/api_spec_and_tasks.md' file_path.write_text(rsp.content) diff --git a/metagpt/actions/write_prd_json.py b/metagpt/actions/write_prd_json.py index 9f44bb1f0..650f8334f 100644 --- a/metagpt/actions/write_prd_json.py +++ b/metagpt/actions/write_prd_json.py @@ -61,13 +61,14 @@ Requirements: According to the context, fill in the following missing informatio ## UI Design draft: Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description. ## Anything UNCLEAR: Provide as Plain text. Make clear here. -Your job is to create a properly formatted JSON +Your job is to create a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like format example """ FORMAT_EXAMPLE = """ +[CONTENT] { "Original Requirements": "", "Search Information": "", - "mermaid quadrantChart code": ''' + "mermaid quadrantChart code": ' "title": "Reach and engagement of campaigns", "x-axis": "Low Reach --> High Reach", "y-axis": "Low Engagement --> High Engagement", @@ -82,7 +83,7 @@ FORMAT_EXAMPLE = """ "Campaign E": [0.40, 0.34], "Campaign F": [0.35, 0.78], "Our Target Product": [0.5, 0.6] - ''' + ' , }, @@ -93,10 +94,11 @@ FORMAT_EXAMPLE = """ "Competitive Analysis": [], "Competitive Quadrant Chart": "", "Requirement Analysis": "", - "Requirement Pool": [], + "Requirement Pool": [["P0","P0 requirement"],["P1","P1 requirement"]], "UI Design draft": "", "Anything UNCLEAR": "", } +[/CONTENT] """ OUTPUT_MAPPING = { "Original Requirements": (str, ...), @@ -105,7 +107,7 @@ OUTPUT_MAPPING = { "Competitive Analysis": (List[str], ...), "Competitive Quadrant Chart": (str, ...), "Requirement Analysis": (str, ...), - "Requirement Pool": (List[Tuple[str, str]], ...), + "Requirement Pool": (List[List[str]], ...), "UI Design draft":(str, ...), "Anything UNCLEAR": (str, ...), } diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index d0756672e..bd52665dc 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -6,7 +6,8 @@ @File : architect.py """ -from metagpt.actions import WriteDesign, WritePRD +from metagpt.actions import WriteDesign, WritePRD, WritePRDJson +from metagpt.actions.design_api_json import WriteDesignJson from metagpt.roles import Role @@ -30,9 +31,9 @@ class Architect(Role): super().__init__(name, profile, goal, constraints) # Initialize actions specific to the Architect role - self._init_actions([WriteDesign]) + self._init_actions([WriteDesignJson]) # Set events or actions the Architect should watch or be aware of - self._watch({WritePRD}) + self._watch({WritePRDJson}) \ No newline at end of file diff --git a/metagpt/roles/project_manager.py b/metagpt/roles/project_manager.py index dd4ba42ae..78d2f8d52 100644 --- a/metagpt/roles/project_manager.py +++ b/metagpt/roles/project_manager.py @@ -6,6 +6,7 @@ @File : project_manager.py """ from metagpt.actions import WriteDesign, WriteTasks +from metagpt.actions.design_api_json import WriteDesignJson from metagpt.roles import Role @@ -36,4 +37,4 @@ class ProjectManager(Role): """ super().__init__(name, profile, goal, constraints) self._init_actions([WriteTasks]) - self._watch([WriteDesign]) \ No newline at end of file + self._watch([WriteDesignJson]) \ No newline at end of file diff --git a/metagpt/utils/custom_decoder.py b/metagpt/utils/custom_decoder.py new file mode 100644 index 000000000..4c4aaa587 --- /dev/null +++ b/metagpt/utils/custom_decoder.py @@ -0,0 +1,230 @@ +import json +import re +from json import scanner, JSONDecodeError +from json.decoder import _decode_uXXXX +NUMBER_RE = re.compile( + r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', + (re.VERBOSE | re.MULTILINE | re.DOTALL)) +def py_make_scanner(context): + parse_object = context.parse_object + parse_array = context.parse_array + parse_string = context.parse_string + match_number = NUMBER_RE.match + strict = context.strict + parse_float = context.parse_float + parse_int = context.parse_int + parse_constant = context.parse_constant + object_hook = context.object_hook + object_pairs_hook = context.object_pairs_hook + memo = context.memo + + def _scan_once(string, idx): + try: + nextchar = string[idx] + except IndexError: + raise StopIteration(idx) from None + + if nextchar == '"' or nextchar == "'": + return parse_string(string, idx + 1, strict, delimiter=nextchar) + elif nextchar == '{': + return parse_object((string, idx + 1), strict, + _scan_once, object_hook, object_pairs_hook, memo) + elif nextchar == '[': + return parse_array((string, idx + 1), _scan_once) + elif nextchar == 'n' and string[idx:idx + 4] == 'null': + return None, idx + 4 + elif nextchar == 't' and string[idx:idx + 4] == 'true': + return True, idx + 4 + elif nextchar == 'f' and string[idx:idx + 5] == 'false': + return False, idx + 5 + + m = match_number(string, idx) + if m is not None: + integer, frac, exp = m.groups() + if frac or exp: + res = parse_float(integer + (frac or '') + (exp or '')) + else: + res = parse_int(integer) + return res, m.end() + elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + return parse_constant('NaN'), idx + 3 + elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + return parse_constant('Infinity'), idx + 8 + elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + return parse_constant('-Infinity'), idx + 9 + else: + raise StopIteration(idx) + + def scan_once(string, idx): + try: + return _scan_once(string, idx) + finally: + memo.clear() + + return scan_once +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +STRINGCHUNK_SINGLEQUOTE = re.compile(r'(.*?)([\'\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': '"', '\\': '\\', '/': '/', + 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', +} +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' +def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, + memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + s, end = s_and_end + pairs = [] + pairs_append = pairs.append + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"' and nextchar != "'": + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + 1 + elif nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end) + end += 1 + while True: + key, end = scanstring(s, end, strict, delimiter=nextchar) + key = memo_get(key, key) + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise JSONDecodeError("Expecting ':' delimiter", s, end) + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + try: + value, end = scan_once(s, end) + except StopIteration as err: + raise JSONDecodeError("Expecting value", s, err.value) from None + pairs_append((key, value)) + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end - 1) + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +def py_scanstring(s, end, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match,delimiter='"'): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + chunks = [] + _append = chunks.append + begin = end - 1 + if delimiter == '"': + _m = STRINGCHUNK.match + else: + _m = STRINGCHUNK_SINGLEQUOTE.match + while 1: + chunk = _m(s, end) + if chunk is None: + raise JSONDecodeError("Unterminated string starting at", s, begin) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == delimiter: + break + elif terminator != '\\': + if strict: + #msg = "Invalid control character %r at" % (terminator,) + msg = "Invalid control character {0!r} at".format(terminator) + raise JSONDecodeError(msg, s, end) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise JSONDecodeError("Unterminated string starting at", + s, begin) from None + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\escape: {0!r}".format(esc) + raise JSONDecodeError(msg, s, end) + end += 1 + else: + uni = _decode_uXXXX(s, end) + end += 5 + if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': + uni2 = _decode_uXXXX(s, end + 1) + if 0xdc00 <= uni2 <= 0xdfff: + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + end += 6 + char = chr(uni) + _append(char) + return ''.join(chunks), end +scanstring = py_scanstring +class CustomDecoder(json.JSONDecoder): + def __init__(self, *, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + super().__init__(object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, strict=strict,object_pairs_hook=object_pairs_hook) + self.parse_object = JSONObject + self.parse_string = py_scanstring + self.scan_once = py_make_scanner(self) + + def decode(self, s, _w=json.decoder.WHITESPACE.match): + + return super().decode(s) + diff --git a/requirements.txt b/requirements.txt index f8891c0ca..7f29c3343 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,3 @@ typing-inspect==0.8.0 typing_extensions==4.5.0 libcst==1.0.1 qdrant-client==1.4.0 -#regex==2023.6.3 \ No newline at end of file