From c233699275930e15b11ef64633d4383ac1fc6ba4 Mon Sep 17 00:00:00 2001 From: better629 Date: Tue, 21 Nov 2023 20:33:58 +0800 Subject: [PATCH 01/38] add aiohttp encapsulation --- metagpt/utils/ahttp_client.py | 59 ++++++++++++++++++++++++ tests/metagpt/utils/test_ahttp_client.py | 38 +++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 metagpt/utils/ahttp_client.py create mode 100644 tests/metagpt/utils/test_ahttp_client.py diff --git a/metagpt/utils/ahttp_client.py b/metagpt/utils/ahttp_client.py new file mode 100644 index 000000000..d4f9f94e5 --- /dev/null +++ b/metagpt/utils/ahttp_client.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : pure async http_client + +from typing import Optional, Any, Mapping, Union + +from aiohttp.client import DEFAULT_TIMEOUT +import aiohttp + + +async def apost(url: str, + params: Optional[Mapping[str, str]] = None, + json: Any = None, + data: Any = None, + headers: Optional[dict] = None, + as_json: bool = False, + encoding: str = "utf-8", + timeout: int = DEFAULT_TIMEOUT.total) -> Union[str, dict]: + async with aiohttp.ClientSession() as session: + async with session.post( + url=url, + params=params, + json=json, + data=data, + headers=headers, + timeout=timeout + ) as resp: + if as_json: + data = await resp.json() + else: + data = await resp.read() + data = data.decode(encoding) + return data + + +async def apost_stream(url: str, + params: Optional[Mapping[str, str]] = None, + json: Any = None, + data: Any = None, + headers: Optional[dict] = None, + encoding: str = "utf-8", + timeout: int = DEFAULT_TIMEOUT.total) -> Any: + """ + usage: + result = astream(url="xx") + async for line in result: + deal_with(line) + """ + async with aiohttp.ClientSession() as session: + async with session.post( + url=url, + params=params, + json=json, + data=data, + headers=headers, + timeout=timeout + ) as resp: + async for line in resp.content: + yield line.decode(encoding) diff --git a/tests/metagpt/utils/test_ahttp_client.py b/tests/metagpt/utils/test_ahttp_client.py new file mode 100644 index 000000000..15159423a --- /dev/null +++ b/tests/metagpt/utils/test_ahttp_client.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : unittest of ahttp_client + +import pytest + +from metagpt.utils.ahttp_client import apost, apost_stream + + +@pytest.mark.asyncio +async def test_apost(): + result = await apost( + url="https://www.baidu.com/" + ) + assert "百度一下" in result + + result = await apost( + url="http://aider.meizu.com/app/weather/listWeather", + data={"cityIds": "101240101"}, + as_json=True + ) + assert result["code"] == "200" + + +@pytest.mark.asyncio +async def test_apost_stream(): + result = apost_stream( + url="https://www.baidu.com/" + ) + async for line in result: + assert len(line) >= 0 + + result = apost_stream( + url="http://aider.meizu.com/app/weather/listWeather", + data={"cityIds": "101240101"} + ) + async for line in result: + assert len(line) >= 0 From c49b832deecfb9d5ab1455d0db238e03e9300740 Mon Sep 17 00:00:00 2001 From: better629 Date: Tue, 21 Nov 2023 20:34:37 +0800 Subject: [PATCH 02/38] add trigger repair_llm_output for open llm --- metagpt/actions/action.py | 21 +- metagpt/config.py | 1 + metagpt/roles/role.py | 14 +- metagpt/utils/repair_llm_raw_output.py | 246 ++++++++++++++++++ tests/metagpt/utils/test_custom_decoder.py | 45 ++++ .../utils/test_repair_llm_raw_output.py | 203 +++++++++++++++ 6 files changed, 515 insertions(+), 15 deletions(-) create mode 100644 metagpt/utils/repair_llm_raw_output.py create mode 100644 tests/metagpt/utils/test_repair_llm_raw_output.py diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 790295d55..f9e4f926b 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -16,6 +16,8 @@ from metagpt.llm import LLM from metagpt.logs import logger from metagpt.utils.common import OutputParser from metagpt.utils.custom_decoder import CustomDecoder +from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType,\ + retry_parse_json_text, extract_content_from_output class Action(ABC): @@ -49,7 +51,7 @@ class Action(ABC): system_msgs.append(self.prefix) return await self.llm.aask(prompt, system_msgs) - @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + # @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def _aask_v1( self, prompt: str, @@ -65,22 +67,19 @@ class Action(ABC): content = await self.llm.aask(prompt, system_msgs) logger.debug(content) output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) + output_class_fields = list(output_class.schema()["properties"].keys()) # Custom ActionOutput's fields if format == "json": - pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]" - matches = re.findall(pattern, content, re.DOTALL) - - for match in matches: - if match: - content = match - break - - parsed_data = CustomDecoder(strict=False).decode(content) + content = repair_llm_raw_output(content, req_keys=output_class_fields + ["[/CONTENT]"]) + content = extract_content_from_output(content) + content = repair_llm_raw_output(content, req_keys=[None], repair_type=RepairType.JSON) # req_keys mocked + logger.info(f"extracted CONTENT from content:\n{content}") + parsed_data = retry_parse_json_text(content) else: # using markdown parser parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) - logger.debug(parsed_data) + logger.debug(f"parsed_data:\n{parsed_data}") instruct_content = output_class(**parsed_data) return ActionOutput(content, instruct_content) diff --git a/metagpt/config.py b/metagpt/config.py index 3f9e742bd..a4c43c28a 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -93,6 +93,7 @@ class Config(metaclass=Singleton): self.mermaid_engine = self._get("MERMAID_ENGINE", "nodejs") self.pyppeteer_executable_path = self._get("PYPPETEER_EXECUTABLE_PATH", "") + self.repair_llm_output = self._get("REPAIR_LLM_OUTPUT", False) self.prompt_format = self._get("PROMPT_FORMAT", "markdown") def _init_with_config_files_and_env(self, configs: dict, yaml_file): diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index b96c361c0..140910f0a 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -19,6 +19,8 @@ from metagpt.llm import LLM, HumanProvider from metagpt.logs import logger from metagpt.memory import Memory, LongTermMemory from metagpt.schema import Message +from metagpt.utils.repair_llm_raw_output import extract_state_value_from_output + PREFIX_TEMPLATE = """You are a {profile}, named {name}, your goal is {goal}, and the constraint is {constraints}. """ @@ -49,6 +51,7 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi {name}: {result} """ + class RoleReactMode(str, Enum): REACT = "react" BY_ORDER = "by_order" @@ -58,6 +61,7 @@ class RoleReactMode(str, Enum): def values(cls): return [item.value for item in cls] + class RoleSetting(BaseModel): """Role Settings""" name: str @@ -79,11 +83,11 @@ class RoleContext(BaseModel): env: 'Environment' = Field(default=None) memory: Memory = Field(default_factory=Memory) long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory) - state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None + state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None todo: Action = Field(default=None) watch: set[Type[Action]] = Field(default_factory=set) news: list[Type[Message]] = Field(default=[]) - react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes + react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes max_react_loop: int = 1 class Config: @@ -127,8 +131,9 @@ class Role: i = action("", llm=self._llm) else: if self._setting.is_human and not isinstance(action.llm, HumanProvider): - logger.warning(f"is_human attribute does not take effect," - f"as Role's {str(action)} was initialized using LLM, try passing in Action classes instead of initialized instances") + logger.warning(f"is_human attribute does not take effect, " + f"as Role's {str(action)} was initialized using LLM, " + f"try passing in Action classes instead of initialized instances") i = action i.set_prefix(self._get_prefix(), self.profile) self._actions.append(i) @@ -193,6 +198,7 @@ class Role: n_states=len(self._states) - 1, previous_state=self._rc.state) # print(prompt) next_state = await self._llm.aask(prompt) + next_state = extract_state_value_from_output(next_state) logger.debug(f"{prompt=}") if (not next_state.isdigit() and next_state != "-1") \ or int(next_state) not in range(-1, len(self._states)): diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py new file mode 100644 index 000000000..a65e4be80 --- /dev/null +++ b/metagpt/utils/repair_llm_raw_output.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : repair llm raw output with particular conditions + +import copy +from enum import Enum +from typing import Union +import regex as re + +from metagpt.logs import logger +from metagpt.config import CONFIG +from metagpt.utils.custom_decoder import CustomDecoder + + +class RepairType(Enum): + CS = "case sensitivity" + SCM = "special character missing" # Usually the req_key appear in pairs like `[key] xx [/key]` + RKPM = "required key pair missing" # condition like `[key] xx` which lacks `[/key]` + JSON = "json format" + + +def repair_case_sensitivity(output: str, req_key: str) -> str: + """ + usually, req_key is the key name of expected json or markdown content, it won't appear in the value part. + fix target string `"Shared Knowledge": ""` but `"Shared knowledge": ""` actually + """ + if req_key in output: + return output + + output_lower = output.lower() + req_key_lower = req_key.lower() + if req_key_lower in output_lower: + # find the sub-part index, and replace it with raw req_key + lidx = output_lower.find(req_key_lower) + source = output[lidx: lidx + len(req_key_lower)] + output = output.replace(source, req_key) + logger.info(f"repair_case_sensitivity: {req_key}") + + return output + + +def repair_special_character_missing(output: str, req_key: str) -> str: + """ + fix target string `[CONTENT]xxx[/CONTENT]` lacks [/CONTENT] + """ + sc_arr = ["/"] + + if req_key in output: + return output + + for sc in sc_arr: + req_key_pure = req_key.replace(sc, "") + appear_cnt = output.count(req_key_pure) + if req_key_pure in output and appear_cnt > 1: + # req_key with special_character usually in the tail side + ridx = output.rfind(req_key_pure) + output = f"{output[:ridx]}{req_key}{output[ridx + len(req_key_pure):]}" + logger.info(f"repair_special_character_missing: {req_key}") + + return output + + +def repair_required_key_pair_missing(output: str, req_key: str) -> str: + """ + implement the req_key pair in the begin or end of the content + req_key format + 1. `[req_key]`, and its pair `[/req_key]` + 2. `[/req_key]`, and its pair `[req_key]` + """ + if req_key.startswith("[") and req_key.endswith("]"): + if "/" in req_key: + left_key = req_key.replace("/", "") # `[/req_key]` -> `[req_key]` + right_key = req_key + else: + left_key = req_key + right_key = f"{req_key[0]}/{req_key[1:]}" # `[req_key]` -> `[/req_key]` + + if left_key not in output: + output = left_key + output + if right_key not in output: + output = output + right_key + + return output + + +def repair_json_format(output: str) -> str: + """ + fix extra `[` or `}` in the end + """ + output = output.strip() + + if output.startswith("[{"): + output = output[1:] + logger.info(f"repair_json_format: {'[{'}") + elif output.endswith("}]"): + output = output[:-1] + logger.info(f"repair_json_format: {'}]'}") + elif output.startswith("{") and output.startswith("]"): + output = output[:-1] + "}" + + return output + + +def _repair_llm_raw_output(output: str, req_key: str, repair_type: RepairType = None) -> str: + repair_types = [repair_type] if repair_type else [item for item in RepairType if item not in [RepairType.JSON]] + for repair_type in repair_types: + if repair_type == RepairType.CS: + output = repair_case_sensitivity(output, req_key) + elif repair_type == RepairType.SCM: + output = repair_special_character_missing(output, req_key) + elif repair_type == RepairType.JSON: + output = repair_json_format(output) + elif repair_type == RepairType.RKPM: + output = repair_required_key_pair_missing(output, req_key) + return output + + +def repair_llm_raw_output(output: str, req_keys: list[str], repair_type: RepairType = None) -> str: + """ + in open-source llm model, it usually can't follow the instruction well, the output may be incomplete, + so here we try to repair it and use all repair methods by default. + typical case + 1. case sensitivity + target: "Original Requirements" + output: "Original requirements" + 2. special character missing + target: [/CONTENT] + output: [CONTENT] + 3. json format + target: { xxx } + output: { xxx }] + """ + if not CONFIG.repair_llm_output: + return output + + # do the repairation usually for non-openai models + for req_key in req_keys: + output = _repair_llm_raw_output(output=output, + req_key=req_key, + repair_type=repair_type) + return output + + +def repair_invalid_json(output: str, error: str) -> str: + """ + repair the situation like there are extra chars like + error examples + example 1. json.decoder.JSONDecodeError: Expecting ',' delimiter: line 154 column 1 (char 2765) + example 2. xxx.JSONDecodeError: Expecting property name enclosed in double quotes: line 14 column 1 (char 266) + """ + pattern = r"line ([0-9]+)" + + matches = re.findall(pattern, error, re.DOTALL) + if len(matches) > 0: + line_no = int(matches[0]) - 1 + + # due to CustomDecoder can handle `"": ''` or `'': ""`, so convert `"""` -> `"`, `'''` -> `'` + output = output.replace('"""', '"').replace("'''", '"') + arr = output.split("\n") + line = arr[line_no].strip() + # different general problems + if line.endswith("],"): + # problem, redundant char `]` + line = line.replace("]", "") + elif line.endswith("},"): + # problem, redundant char `}` + line = line.replace("}", "") + elif '",' not in line: + line = f'{line}",' + elif "," not in line: + # problem, miss char `,` at the end. + line = f"{line}," + + arr[line_no] = line + output = "\n".join(arr) + logger.info(f"repair_invalid_json, raw error: {error}") + + return output + + +def retry_parse_json_text(output: str, retry: int = 5) -> Union[list, dict]: + """ + repair the json-text situation like there are extra chars like [']', '}'] + """ + parsed_data = {} + for idx in range(retry): + raw_output = copy.deepcopy(output) + + try: + parsed_data = CustomDecoder(strict=False).decode(output) + break + except Exception as exp: + if not CONFIG.repair_llm_output: + # if repair_llm_output is False, break from the retry loop + break + + logger.warning(f"decode content into json failed, try to fix it. exp: {exp}") + error = str(exp) + output = repair_invalid_json(output, error) + + return parsed_data + + +def extract_content_from_output(content: str, right_key: str = "[/CONTENT]"): + """ extract xxx from [CONTENT](xxx)[/CONTENT] using regex pattern """ + def re_extract_content(cont: str, pattern: str) -> str: + matches = re.findall(pattern, cont, re.DOTALL) + for match in matches: + if match: + cont = match + break + return cont.strip() + + raw_content = copy.deepcopy(content) + pattern = r"\[CONTENT\]([\s\S]*)\[/CONTENT\]" + new_content = re_extract_content(raw_content, pattern) + + if not new_content.startswith("{"): + # TODO find a more general pattern + # # for `[CONTENT]xxx[CONTENT]xxxx[/CONTENT] situation + logger.warning(f"extract_content try another pattern: {pattern}") + raw_content = copy.deepcopy(new_content + right_key) + # # pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]" + new_content = re_extract_content(raw_content, pattern) + else: + if right_key in new_content: + idx = new_content.find(right_key) + new_content = new_content[:idx] + + return new_content + + +def extract_state_value_from_output(content: str) -> str: + """ + For openai models, they will always return state number. But for open llm models, the instruction result maybe a + long text contain target number, so here add a extraction to improve success rate. + + Args: + content (str): llm's output from `Role._think` + """ + content = content.strip() # deal the output cases like " 0", "0\n" and so on. + pattern = r"([0-9])" # TODO find the number using a more proper method not just extract from content using pattern + matches = re.findall(pattern, content, re.DOTALL) + matches = list(set(matches)) + state = matches[0] if len(matches) > 0 else "-1" + return state diff --git a/tests/metagpt/utils/test_custom_decoder.py b/tests/metagpt/utils/test_custom_decoder.py index c7b14ad59..4af7a6cdc 100644 --- a/tests/metagpt/utils/test_custom_decoder.py +++ b/tests/metagpt/utils/test_custom_decoder.py @@ -6,6 +6,7 @@ @File : test_custom_decoder.py """ +import pytest from metagpt.utils.custom_decoder import CustomDecoder @@ -37,6 +38,46 @@ def test_parse_single_quote(): parsed_data = decoder.decode(input_data) assert 'a"\n b' in parsed_data + input_data = """{ + 'a': " + b +" +} +""" + with pytest.raises(Exception): + parsed_data = decoder.decode(input_data) + + input_data = """{ + 'a': ' + b +' +} +""" + with pytest.raises(Exception): + parsed_data = decoder.decode(input_data) + + +def test_parse_double_quote(): + decoder = CustomDecoder(strict=False) + + input_data = """{ + "a": " + b +" +} +""" + parsed_data = decoder.decode(input_data) + assert parsed_data["a"] == "\n b\n" + + input_data = """{ + "a": ' + b +' +} +""" + parsed_data = decoder.decode(input_data) + assert parsed_data["a"] == "\n b\n" + def test_parse_triple_double_quote(): # Create a custom JSON decoder @@ -54,6 +95,10 @@ def test_parse_triple_double_quote(): parsed_data = decoder.decode(input_data) assert parsed_data["a"] == "b" + input_data = "{\"\"\"a\"\"\": '''b'''}" + parsed_data = decoder.decode(input_data) + assert parsed_data["a"] == "b" + def test_parse_triple_single_quote(): # Create a custom JSON decoder diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py new file mode 100644 index 000000000..39a7343e7 --- /dev/null +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : unittest of repair_llm_raw_output + +import pytest + +from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType, repair_invalid_json,\ + extract_content_from_output, retry_parse_json_text + + +def test_repair_case_sensitivity(): + raw_output = """{ + "Original requirements": "Write a 2048 game", + "search Information": "", + "competitive Quadrant charT": "quadrantChart + Campaign A: [0.3, 0.6]", + "requirement analysis": "The 2048 game should be simple to play" +}""" + target_output = """{ + "Original Requirements": "Write a 2048 game", + "Search Information": "", + "Competitive Quadrant Chart": "quadrantChart + Campaign A: [0.3, 0.6]", + "Requirement Analysis": "The 2048 game should be simple to play" +}""" + req_keys = ["Original Requirements", "Search Information", "Competitive Quadrant Chart", "Requirement Analysis"] + output = repair_llm_raw_output(output=raw_output, + req_keys=req_keys) + assert output == target_output + + +def test_repair_special_character_missing(): + raw_output = """[CONTENT] + "Anything UNCLEAR": "No unclear requirements or information." +[CONTENT]""" + + target_output = """[CONTENT] + "Anything UNCLEAR": "No unclear requirements or information." +[/CONTENT]""" + req_keys = ["[/CONTENT]"] + output = repair_llm_raw_output(output=raw_output, + req_keys=req_keys) + assert output == target_output + + +def test_required_key_pair_missing(): + raw_output = "[CONTENT] xxx" + target_output = "[CONTENT] xxx[/CONTENT]" + + output = repair_llm_raw_output(output=raw_output, + req_keys=["[/CONTENT]"]) + assert output == target_output + + raw_output = "xxx[/CONTENT]" + target_output = "[CONTENT]xxx[/CONTENT]" + + output = repair_llm_raw_output(output=raw_output, + req_keys=["[CONTENT]"]) + assert output == target_output + + +def test_repair_json_format(): + raw_output = "{ xxx }]" + target_output = "{ xxx }" + + output = repair_llm_raw_output(output=raw_output, + req_keys=[None], + repair_type=RepairType.JSON) + assert output == target_output + + +def test_retry_parse_json_text(): + invalid_json_text = """{ +"Original Requirements": "Create a 2048 game", +"Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis" +], +"Requirement Analysis": "The requirements are clear and well-defined" +}""" + target_json = { + "Original Requirements": "Create a 2048 game", + "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", + "Requirement Analysis": "The requirements are clear and well-defined" + } + output = retry_parse_json_text(invalid_json_text) + assert output == target_json + + invalid_json_text = """{ +"Original Requirements": "Create a 2048 game", +"Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis" +}, +"Requirement Analysis": "The requirements are clear and well-defined" +}""" + target_json = { + "Original Requirements": "Create a 2048 game", + "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", + "Requirement Analysis": "The requirements are clear and well-defined" + } + output = retry_parse_json_text(invalid_json_text) + assert output == target_json + + +def test_extract_content_from_output(): + output = 'Sure! Here is the properly formatted JSON output based on the given context:\n\n[CONTENT]\n{\n"' \ + 'Required Python third-party packages": [\n"pygame==2.0.4",\n"pytest"\n],\n"Required Other language ' \ + 'third-party packages": [\n"No third-party packages are required."\n],\n"Full API spec": "\nopenapi: ' \ + '3.0.0\n\ndescription: A JSON object representing the game state.\n\npaths:\n game:\n get:\n ' \ + 'summary: Get the current game state.\n responses:\n 200:\n description: Game state.' \ + '\n\n moves:\n post:\n summary: Make a move.\n requestBody:\n description: Move to be ' \ + 'made.\n content:\n applicationjson:\n schema:\n type: object\n ' \ + ' properties:\n x:\n type: integer\n y:\n ' \ + ' type: integer\n tile:\n type: object\n ' \ + 'properties:\n value:\n type: integer\n x:\n ' \ + ' type: integer\n y:\n type: integer\n\n ' \ + 'undo-move:\n post:\n summary: Undo the last move.\n responses:\n 200:\n ' \ + ' description: Undone move.\n\n end-game:\n post:\n summary: End the game.\n responses:\n ' \ + ' 200:\n description: Game ended.\n\n start-game:\n post:\n summary: Start a new ' \ + 'game.\n responses:\n 200:\n description: Game started.\n\n game-over:\n get:\n ' \ + ' summary: Check if the game is over.\n responses:\n 200:\n description: Game ' \ + 'over.\n 404:\n description: Game not over.\n\n score:\n get:\n summary: Get the ' \ + 'current score.\n responses:\n 200:\n description: Score.\n\n tile:\n get:\n ' \ + 'summary: Get a specific tile.\n parameters:\n tile_id:\n type: integer\n ' \ + 'description: ID of the tile to get.\n responses:\n 200:\n description: Tile.\n\n ' \ + 'tiles:\n get:\n summary: Get all tiles.\n responses:\n 200:\n description: ' \ + 'Tiles.\n\n level:\n get:\n summary: Get the current level.\n responses:\n 200:\n ' \ + ' description: Level.\n\n level-up:\n post:\n summary: Level up.\n responses:\n ' \ + '200:\n description: Level up successful.\n\n level-down:\n post:\n summary: Level ' \ + 'down.\n responses:\n 200:\n description: Level down successful.\n\n restart:\n ' \ + 'post:\n summary: Restart the game.\n responses:\n 200:\n description: Game ' \ + 'restarted.\n\n help:\n get:\n summary: Get help.\n responses:\n 200:\n ' \ + 'description: Help.\n\n version:\n get:\n summary: Get the version of the game.\n ' \ + 'responses:\n 200:\n description: Version.\n\n}\n\n"Logic Analysis": [\n"game.py",' \ + '\n"Contains the game logic."\n],\n"Task list": [\n"game.py",\n"Contains the game logic and should be ' \ + 'done first."\n],\n"Shared Knowledge": "\n\'game.py\' contains the game logic.\n",\n"Anything ' \ + 'UNCLEAR": "How to start the game."\n]\n\n[/CONTENT] Great! Your JSON output is properly formatted ' \ + 'and correctly includes all the required sections. Here\'s a breakdown of what each section ' \ + 'contains:\n\nRequired Python third-party packages:\n\n* pygame==2.0.4\n* pytest\n\nRequired Other ' \ + 'language third-party packages:\n\n* No third-party packages are required.\n\nFull API spec:\n\n* ' \ + 'openapi: 3.0.0\n* description: A JSON object representing the game state.\n* paths:\n + game: ' \ + 'Get the current game state.\n + moves: Make a move.\n + undo-move: Undo the last move.\n + ' \ + 'end-game: End the game.\n + start-game: Start a new game.\n + game-over: Check if the game is ' \ + 'over.\n + score: Get the current score.\n + tile: Get a specific tile.\n + tiles: Get all tiles.\n ' \ + '+ level: Get the current level.\n + level-up: Level up.\n + level-down: Level down.\n + restart: ' \ + 'Restart the game.\n + help: Get help.\n + version: Get the version of the game.\n\nLogic ' \ + 'Analysis:\n\n* game.py contains the game logic.\n\nTask list:\n\n* game.py contains the game logic ' \ + 'and should be done first.\n\nShared Knowledge:\n\n* \'game.py\' contains the game logic.\n\nAnything ' \ + 'UNCLEAR:\n\n* How to start the game.\n\nGreat job! This JSON output should provide a clear and ' \ + 'comprehensive overview of the project\'s requirements and dependencies.' + output = extract_content_from_output(output) + assert output.startswith('{\n"Required Python third-party packages') + + output = 'Sure, I would be happy to help! Here is the information you provided, formatted as a JSON object ' \ + 'inside the [CONTENT] tag:\n\n[CONTENT]\n{\n"Original Requirements": "Create a 2048 game",\n"Search ' \ + 'Information": "Search results for 2048 game",\n"Requirements": [\n"Create a game with the same rules ' \ + 'as the original 2048 game",\n"Implement a user interface that is easy to use and understand",\n"Add a ' \ + 'scoreboard to track the player progress",\n"Allow the player to undo and redo moves",\n"Implement a ' \ + 'game over screen to display the final score"\n],\n"Product Goals": [\n"Create a fun and engaging game ' \ + 'experience for the player",\n"Design a user interface that is visually appealing and easy to use",\n"' \ + 'Optimize the game for performance and responsiveness"\n],\n"User Stories": [\n"As a player, I want to ' \ + 'be able to move tiles around the board to combine numbers",\n"As a player, I want to be able to undo ' \ + 'and redo moves to correct mistakes",\n"As a player, I want to see the final score and game over screen' \ + ' when I win"\n],\n"Competitive Analysis": [\n"Competitor A: 2048 game with a simple user interface and' \ + ' basic graphics",\n"Competitor B: 2048 game with a more complex user interface and better graphics",' \ + '\n"Competitor C: 2048 game with a unique twist on the rules and a more challenging gameplay experience"' \ + '\n],\n"Competitive Quadrant Chart": "quadrantChart\\n\ttitle Reach and engagement of campaigns\\n\t\t' \ + 'x-axis Low Reach --> High Reach\\n\t\ty-axis Low Engagement --> High Engagement\\n\tquadrant-1 We ' \ + 'should expand\\n\tquadrant-2 Need to promote\\n\tquadrant-3 Re-evaluate\\n\tquadrant-4 May be ' \ + 'improved\\n\tCampaign A: [0.3, 0.6]\\n\tCampaign B: [0.45, 0.23]\\n\tCampaign C: [0.57, 0.69]\\n\t' \ + 'Campaign D: [0.78, 0.34]\\n\tCampaign E: [0.40, 0.34]\\n\tCampaign F: [0.35, 0.78]"\n],\n"Requirement ' \ + 'Analysis": "The requirements are clear and well-defined, but there may be some ambiguity around the ' \ + 'specific implementation details",\n"Requirement Pool": [\n["P0", "Implement a game with the same ' \ + 'rules as the original 2048 game"],\n["P1", "Add a scoreboard to track the player progress"],\n["P2", ' \ + '"Allow the player to undo and redo moves"]\n],\n"UI Design draft": "The UI should be simple and easy ' \ + 'to use, with a clean and visually appealing design. The game board should be the main focus of the ' \ + 'UI, with clear and concise buttons for the player to interact with.",\n"Anything UNCLEAR": ""\n}\n' \ + '[/CONTENT]\n\nI hope this helps! Let me know if you have any further questions or if there anything ' \ + 'else I can do to assist you.' + output = extract_content_from_output(output) + assert output.startswith('{\n"Original Requirements"') + + output = """ Sure, I'd be happy to help! Here's the JSON output for the given context:\n\n[CONTENT]\n{ +"Implementation approach": "We will use the open-source framework PyGame to create a 2D game engine, which will + provide us with a robust and efficient way to handle game logic and rendering. PyGame is widely used in the game + development community and has a large number of resources and tutorials available online.",\n"Python package name": + "pygame_2048",\n"File list": ["main.py", "game.py", "constants.py", "ui.py"],\n"Data structures and interface + definitions": '\nclassDiagram\n class Game{\n +int score\n +list tiles\n +function + move_tile(tile, int dx, int dy)\n +function undo_move()\n +function get_highest_score()\n }\n + class Tile{\n +int value\n +int x\n +int y\n }\n ...\n Game "1" -- "1" Food: has\n', +\n"Program call flow": '\nsequenceDiagram\n participant M as Main\n participant G as Game\n ...\n G->>M: + end game\n',\n"Anything UNCLEAR": "The requirement is clear to me."\n}\n[/CONTENT] Here's the JSON output for the + given context, wrapped inside the [CONTENT][/CONTENT] format:\n\n[CONTENT]\n{\n"Implementation approach": "We will + use the open-source framework PyGame to create a 2D game engine, which will provide us with a robust and efficient + way to handle game logic and rendering. PyGame is widely used in the game development community and has a large + number of resources and tutorials available online.",\n"Python package name": "pygame_2048",\n"File list": + ["main.py", "game.py", "constants.py", "ui.py"],\n"Data structures and interface definitions": '\nclassDiagram\n + class Game{\n +int score\n +list tiles\n +function move_tile(tile, int dx, int dy)\n ++function undo_move()\n +function get_highest_score()\n }\n class Tile{\n +int value\n +int x\n + +int y\n }\n ...\n Game "1" -- "1" Food: has\n',\n"Program call flow": '\nsequenceDiagram\n participant + M as Main\n participant G as Game\n ...\n G->>M: end game\n',\n"Anything UNCLEAR": "The requirement is + clear to me."\n}\n[/CONTENT] Great! Your JSON output is well-formatted and provides all the necessary + information for a developer to understand the design and implementation of the 2048 game. +""" + output = extract_content_from_output(output) + assert output.startswith('{\n"Implementation approach"') and "[/CONTENT]" not in output From fc4ec5a9449bd644e0e13c30f2de4867f0b9685a Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 22 Nov 2023 13:01:16 +0800 Subject: [PATCH 03/38] update retry_parse_json_text --- config/config.yaml | 5 + metagpt/actions/action.py | 17 +-- metagpt/utils/repair_llm_raw_output.py | 110 +++++++++++++----- .../utils/test_repair_llm_raw_output.py | 66 ++++++++++- 4 files changed, 158 insertions(+), 40 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index bed67083c..72d2c0b19 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -94,4 +94,9 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k ### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge #PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable" +### for repair non-openai LLM's output when parse json-text if PROMPT_FORMAT=json +### due to non-openai LLM's output will not always follow the instruction, so here activate a post-process +### repair operation on the content extracted from LLM's raw output. Warning, it improves the result but not fix all cases. +# REPAIR_LLM_OUTPUT: false + PROMPT_FORMAT: json #json or markdown \ No newline at end of file diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index f9e4f926b..7433c3857 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -5,17 +5,16 @@ @Author : alexanderwu @File : action.py """ -import re + from abc import ABC from typing import Optional -from tenacity import retry, stop_after_attempt, wait_fixed +from tenacity import retry, stop_after_attempt, wait_fixed, after_log from metagpt.actions.action_output import ActionOutput from metagpt.llm import LLM from metagpt.logs import logger from metagpt.utils.common import OutputParser -from metagpt.utils.custom_decoder import CustomDecoder from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType,\ retry_parse_json_text, extract_content_from_output @@ -51,7 +50,11 @@ class Action(ABC): system_msgs.append(self.prefix) return await self.llm.aask(prompt, system_msgs) - # @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + @retry( + stop=stop_after_attempt(3), + wait=wait_fixed(1), + after=after_log(logger, logger.level("ERROR").name), + ) async def _aask_v1( self, prompt: str, @@ -65,7 +68,7 @@ class Action(ABC): system_msgs = [] system_msgs.append(self.prefix) content = await self.llm.aask(prompt, system_msgs) - logger.debug(content) + logger.debug(f"llm raw output:\n{content}") output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) output_class_fields = list(output_class.schema()["properties"].keys()) # Custom ActionOutput's fields @@ -73,8 +76,8 @@ class Action(ABC): content = repair_llm_raw_output(content, req_keys=output_class_fields + ["[/CONTENT]"]) content = extract_content_from_output(content) content = repair_llm_raw_output(content, req_keys=[None], repair_type=RepairType.JSON) # req_keys mocked - logger.info(f"extracted CONTENT from content:\n{content}") - parsed_data = retry_parse_json_text(content) + logger.info(f"extracted json CONTENT from output:\n{content}") + parsed_data = retry_parse_json_text(output=content) # should use output=content else: # using markdown parser parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index a65e4be80..c26dc838d 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -4,8 +4,9 @@ import copy from enum import Enum -from typing import Union +from typing import Union, Callable import regex as re +from tenacity import retry, stop_after_attempt, wait_fixed, after_log, RetryCallState from metagpt.logs import logger from metagpt.config import CONFIG @@ -14,8 +15,8 @@ from metagpt.utils.custom_decoder import CustomDecoder class RepairType(Enum): CS = "case sensitivity" - SCM = "special character missing" # Usually the req_key appear in pairs like `[key] xx [/key]` RKPM = "required key pair missing" # condition like `[key] xx` which lacks `[/key]` + SCM = "special character missing" # Usually the req_key appear in pairs like `[key] xx [/key]` JSON = "json format" @@ -39,9 +40,11 @@ def repair_case_sensitivity(output: str, req_key: str) -> str: return output -def repair_special_character_missing(output: str, req_key: str) -> str: +def repair_special_character_missing(output: str, req_key: str = "[/CONTENT]") -> str: """ - fix target string `[CONTENT]xxx[/CONTENT]` lacks [/CONTENT] + fix + 1. target string `[CONTENT] xx [CONTENT] xxx [CONTENT]` lacks `/` in the last `[CONTENT]` + 2. target string `xx [CONTENT] xxx [CONTENT] xxxx` lacks `/` in the last `[CONTENT]` """ sc_arr = ["/"] @@ -55,30 +58,48 @@ def repair_special_character_missing(output: str, req_key: str) -> str: # req_key with special_character usually in the tail side ridx = output.rfind(req_key_pure) output = f"{output[:ridx]}{req_key}{output[ridx + len(req_key_pure):]}" - logger.info(f"repair_special_character_missing: {req_key}") + logger.info(f"repair_special_character_missing: {sc} in {req_key_pure} as position {ridx}") return output -def repair_required_key_pair_missing(output: str, req_key: str) -> str: +def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") -> str: """ implement the req_key pair in the begin or end of the content req_key format 1. `[req_key]`, and its pair `[/req_key]` 2. `[/req_key]`, and its pair `[req_key]` """ + sc = "/" # special char if req_key.startswith("[") and req_key.endswith("]"): - if "/" in req_key: - left_key = req_key.replace("/", "") # `[/req_key]` -> `[req_key]` + if sc in req_key: + left_key = req_key.replace(sc, "") # `[/req_key]` -> `[req_key]` right_key = req_key else: left_key = req_key - right_key = f"{req_key[0]}/{req_key[1:]}" # `[req_key]` -> `[/req_key]` + right_key = f"{req_key[0]}{sc}{req_key[1:]}" # `[req_key]` -> `[/req_key]` if left_key not in output: - output = left_key + output + output = left_key + "\n" + output if right_key not in output: - output = output + right_key + def judge_potential_json(routput: str, left_key: str) -> Union[str, bool]: + routput = copy.deepcopy(routput) + ridx = routput.rfind(left_key) + if ridx < 0: + return None + sub_output = routput[ridx:] + idx1 = sub_output.rfind("}") + idx2 = sub_output.rindex("]") + idx = idx1 if idx1 >= idx2 else idx2 + sub_output = sub_output[: idx] + return sub_output + + if output.strip().endswith("}") or (output.strip().endswith("]") and not output.strip().endswith(left_key)): + # # avoid [req_key]xx[req_key] case to append [/req_key] + output = output + "\n" + right_key + elif judge_potential_json(output, left_key): + sub_content = judge_potential_json(output, left_key) + output = sub_content + "\n" + right_key return output @@ -106,12 +127,12 @@ def _repair_llm_raw_output(output: str, req_key: str, repair_type: RepairType = for repair_type in repair_types: if repair_type == RepairType.CS: output = repair_case_sensitivity(output, req_key) + elif repair_type == RepairType.RKPM: + output = repair_required_key_pair_missing(output, req_key) elif repair_type == RepairType.SCM: output = repair_special_character_missing(output, req_key) elif repair_type == RepairType.JSON: output = repair_json_format(output) - elif repair_type == RepairType.RKPM: - output = repair_required_key_pair_missing(output, req_key) return output @@ -178,25 +199,58 @@ def repair_invalid_json(output: str, error: str) -> str: return output -def retry_parse_json_text(output: str, retry: int = 5) -> Union[list, dict]: +def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["RetryCallState"], None]: + def run_and_passon(retry_state: RetryCallState) -> None: + """ + RetryCallState example + { + "start_time":143.098322024, + "retry_object":")>", + "fn":"", + "args":"(\"tag:[/CONTENT]\",)", # function input args + "kwargs":{}, # function input kwargs + "attempt_number":1, # retry number + "outcome":"", # type(outcome.result()) = "str", type(outcome.exception()) = "class" + "outcome_timestamp":143.098416904, + "idle_for":0, + "next_action":"None" + } + """ + if retry_state.outcome.failed: + if len(retry_state.args) > 0: + # # can't used as args=retry_state.args + func_param_output = retry_state.args[0] + elif len(retry_state.kwargs) > 0: + func_param_output = retry_state.kwargs.get("output", "") + # import pdb; pdb.set_trace() + exp_str = str(retry_state.outcome.exception()) + logger.warning(f"parse json from content inside [CONTENT][/CONTENT] failed at retry " + f"{retry_state.attempt_number}, try to fix it, exp: {exp_str}") + + repaired_output = repair_invalid_json(func_param_output, exp_str) + retry_state.kwargs["output"] = repaired_output + + return run_and_passon + + +@retry( + stop=stop_after_attempt(3 if CONFIG.repair_llm_output else 0), + wait=wait_fixed(1), + after=run_after_exp_and_passon_next_retry(logger), +) +def retry_parse_json_text(output: str) -> Union[list, dict]: """ repair the json-text situation like there are extra chars like [']', '}'] + + Warning + if CONFIG.repair_llm_output is False, retry _aask_v1 {x=3} times, and the retry_parse_json_text's retry not work + if CONFIG.repair_llm_output is True, the _aask_v1 and the retry_parse_json_text will loop for {x=3*3} times. + it's a two-layer retry cycle """ - parsed_data = {} - for idx in range(retry): - raw_output = copy.deepcopy(output) + logger.debug(f"output to json decode:\n{output}") - try: - parsed_data = CustomDecoder(strict=False).decode(output) - break - except Exception as exp: - if not CONFIG.repair_llm_output: - # if repair_llm_output is False, break from the retry loop - break - - logger.warning(f"decode content into json failed, try to fix it. exp: {exp}") - error = str(exp) - output = repair_invalid_json(output, error) + # if CONFIG.repair_llm_output is True, it will try to fix output until the retry break + parsed_data = CustomDecoder(strict=False).decode(output) return parsed_data diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index 39a7343e7..dfcf60ad5 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -42,20 +42,69 @@ def test_repair_special_character_missing(): req_keys=req_keys) assert output == target_output + raw_output = """[CONTENT] tag +[CONTENT] +{ +"Anything UNCLEAR": "No unclear requirements or information." +} +[CONTENT]""" + target_output = """[CONTENT] tag +[CONTENT] +{ +"Anything UNCLEAR": "No unclear requirements or information." +} +[/CONTENT]""" + output = repair_llm_raw_output(output=raw_output, + req_keys=req_keys) + assert output == target_output + + raw_output = '[CONTENT] {"a": "b"} [CONTENT]' + target_output = '[CONTENT] {"a": "b"} [/CONTENT]' + + output = repair_llm_raw_output(output=raw_output, + req_keys=["[/CONTENT]"]) + print("output\n", output) + assert output == target_output + def test_required_key_pair_missing(): - raw_output = "[CONTENT] xxx" - target_output = "[CONTENT] xxx[/CONTENT]" + raw_output = '[CONTENT] {"a": "b"}' + target_output = '[CONTENT] {"a": "b"}\n[/CONTENT]' output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) assert output == target_output - raw_output = "xxx[/CONTENT]" - target_output = "[CONTENT]xxx[/CONTENT]" + raw_output = '''[CONTENT] +{ + "a": "b" +]''' + target_output = '''[CONTENT] +{ + "a": "b" +] +[/CONTENT]''' output = repair_llm_raw_output(output=raw_output, - req_keys=["[CONTENT]"]) + req_keys=["[/CONTENT]"]) + assert output == target_output + + raw_output = '''[CONTENT] tag +[CONTENT] +{ + "a": "b" +} +xxx +''' + target_output = '''[CONTENT] tag +[CONTENT] +{ + "a": "b" +} +[/CONTENT] +''' + output = repair_llm_raw_output(output=raw_output, + req_keys=["[/CONTENT]"]) assert output == target_output @@ -100,6 +149,13 @@ def test_retry_parse_json_text(): def test_extract_content_from_output(): + """ + cases + xxx [CONTENT] xxxx [/CONTENT] + xxx [CONTENT] xxx [CONTENT] xxxx [/CONTENT] + xxx [CONTENT] xxxx [/CONTENT] xxx [CONTENT][/CONTENT] xxx [CONTENT][/CONTENT] # target pair is the last one + """ + output = 'Sure! Here is the properly formatted JSON output based on the given context:\n\n[CONTENT]\n{\n"' \ 'Required Python third-party packages": [\n"pygame==2.0.4",\n"pytest"\n],\n"Required Other language ' \ 'third-party packages": [\n"No third-party packages are required."\n],\n"Full API spec": "\nopenapi: ' \ From fc5c01e21943edc7b84376f62e5d6c9ef5634203 Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 22 Nov 2023 13:56:49 +0800 Subject: [PATCH 04/38] fix --- metagpt/utils/repair_llm_raw_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index c26dc838d..a12a36fcc 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -82,7 +82,7 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - if left_key not in output: output = left_key + "\n" + output if right_key not in output: - def judge_potential_json(routput: str, left_key: str) -> Union[str, bool]: + def judge_potential_json(routput: str, left_key: str) -> Union[str]: routput = copy.deepcopy(routput) ridx = routput.rfind(left_key) if ridx < 0: From 642335317b6a11f67da5b39fc84deca11249d331 Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 01:46:14 +0800 Subject: [PATCH 05/38] add independent openllm and fireworks config fields, add llm output postprecess plugin --- config/config.yaml | 9 +++ metagpt/actions/action.py | 27 ++++--- metagpt/config.py | 15 +++- metagpt/llm.py | 7 +- metagpt/provider/fireworks_api.py | 24 +++++++ metagpt/provider/open_llm_api.py | 47 ++++++++++++ .../postprecess/base_postprecess_plugin.py | 72 +++++++++++++++++++ .../postprecess/llm_output_postprecess.py | 23 ++++++ metagpt/utils/repair_llm_raw_output.py | 14 ++-- .../utils/test_repair_llm_raw_output.py | 34 ++++++--- 10 files changed, 243 insertions(+), 29 deletions(-) create mode 100644 metagpt/provider/fireworks_api.py create mode 100644 metagpt/provider/open_llm_api.py create mode 100644 metagpt/provider/postprecess/base_postprecess_plugin.py create mode 100644 metagpt/provider/postprecess/llm_output_postprecess.py diff --git a/config/config.yaml b/config/config.yaml index 72d2c0b19..080de4000 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -34,6 +34,15 @@ RPM: 10 #### if zhipuai from `https://open.bigmodel.cn`. You can set here or export API_KEY="YOUR_API_KEY" # ZHIPUAI_API_KEY: "YOUR_API_KEY" +#### if use self-host open llm model with openai-compatible interface +#OPEN_LLM_API_BASE: "http://127.0.0.1:8000/v1" +#OPEN_LLM_API_MODEL: "llama2-13b" +# +##### if use Fireworks api +#FIREWORKS_API_KEY: "YOUR_API_KEY" +#FIREWORKS_API_BASE: "https://api.fireworks.ai/inference/v1" +#FIREWORKS_API_MODEL: "YOUR_LLM_MODEL" # example, accounts/fireworks/models/llama-v2-13b-chat + #### for Search ## Supported values: serpapi/google/serper/ddg diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 7433c3857..cb5bd9ce1 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -6,17 +6,29 @@ @File : action.py """ +import typing from abc import ABC from typing import Optional -from tenacity import retry, stop_after_attempt, wait_fixed, after_log +from tenacity import retry, stop_after_attempt, wait_fixed, after_log, _utils from metagpt.actions.action_output import ActionOutput from metagpt.llm import LLM from metagpt.logs import logger from metagpt.utils.common import OutputParser -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType,\ - retry_parse_json_text, extract_content_from_output +from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess + + +def action_after_log(logger: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]: + def log_it(retry_state: "RetryCallState") -> None: + if retry_state.fn is None: + fn_name = "" + else: + fn_name = _utils.get_callback_name(retry_state.fn) + logger.error(f"Finished call to '{fn_name}' after {sec_format % retry_state.seconds_since_start}(s), " + f"this was the {_utils.to_ordinal(retry_state.attempt_number)} time calling it. " + f"exp: {retry_state.outcome.exception()}") + return log_it class Action(ABC): @@ -53,7 +65,7 @@ class Action(ABC): @retry( stop=stop_after_attempt(3), wait=wait_fixed(1), - after=after_log(logger, logger.level("ERROR").name), + after=action_after_log(logger), ) async def _aask_v1( self, @@ -70,14 +82,9 @@ class Action(ABC): content = await self.llm.aask(prompt, system_msgs) logger.debug(f"llm raw output:\n{content}") output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) - output_class_fields = list(output_class.schema()["properties"].keys()) # Custom ActionOutput's fields if format == "json": - content = repair_llm_raw_output(content, req_keys=output_class_fields + ["[/CONTENT]"]) - content = extract_content_from_output(content) - content = repair_llm_raw_output(content, req_keys=[None], repair_type=RepairType.JSON) # req_keys mocked - logger.info(f"extracted json CONTENT from output:\n{content}") - parsed_data = retry_parse_json_text(output=content) # should use output=content + parsed_data = llm_output_postprecess(output=content, schema=output_class.schema(), req_key="[/CONTENT]") else: # using markdown parser parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) diff --git a/metagpt/config.py b/metagpt/config.py index a4c43c28a..2ce75b013 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -46,10 +46,18 @@ class Config(metaclass=Singleton): self.openai_api_key = self._get("OPENAI_API_KEY") self.anthropic_api_key = self._get("Anthropic_API_KEY") self.zhipuai_api_key = self._get("ZHIPUAI_API_KEY") + + self.open_llm_api_base = self._get("OPEN_LLM_API_BASE") + self.open_llm_api_model = self._get("OPEN_LLM_API_MODEL") + + self.fireworks_api_key = self._get("FIREWORKS_API_KEY") if (not self.openai_api_key or "YOUR_API_KEY" == self.openai_api_key) and \ (not self.anthropic_api_key or "YOUR_API_KEY" == self.anthropic_api_key) and \ - (not self.zhipuai_api_key or "YOUR_API_KEY" == self.zhipuai_api_key): - raise NotConfiguredException("Set OPENAI_API_KEY or Anthropic_API_KEY or ZHIPUAI_API_KEY first") + (not self.zhipuai_api_key or "YOUR_API_KEY" == self.zhipuai_api_key) and \ + (not self.open_llm_api_base) and \ + (not self.fireworks_api_key or "YOUR_API_KEY" == self.fireworks_api_key): + raise NotConfiguredException("Set OPENAI_API_KEY or Anthropic_API_KEY or ZHIPUAI_API_KEY first " + "or FIREWORKS_API_KEY or OPEN_LLM_API_BASE") self.openai_api_base = self._get("OPENAI_API_BASE") openai_proxy = self._get("OPENAI_PROXY") or self.global_proxy if openai_proxy: @@ -69,6 +77,9 @@ class Config(metaclass=Singleton): self.domain = self._get("DOMAIN") self.spark_url = self._get("SPARK_URL") + self.fireworks_api_base = self._get("FIREWORKS_API_BASE") + self.fireworks_api_model = self._get("FIREWORKS_API_MODEL") + self.claude_api_key = self._get("Anthropic_API_KEY") self.serpapi_api_key = self._get("SERPAPI_API_KEY") self.serper_api_key = self._get("SERPER_API_KEY") diff --git a/metagpt/llm.py b/metagpt/llm.py index 4edcd7a83..1f7d1b4c9 100644 --- a/metagpt/llm.py +++ b/metagpt/llm.py @@ -6,12 +6,13 @@ @File : llm.py """ -from metagpt.logs import logger from metagpt.config import CONFIG from metagpt.provider.anthropic_api import Claude2 as Claude from metagpt.provider.openai_api import OpenAIGPTAPI from metagpt.provider.zhipuai_api import ZhiPuAIGPTAPI from metagpt.provider.spark_api import SparkAPI +from metagpt.provider.open_llm_api import OpenLLMGPTAPI +from metagpt.provider.fireworks_api import FireWorksGPTAPI from metagpt.provider.human_provider import HumanProvider @@ -26,6 +27,10 @@ def LLM() -> "BaseGPTAPI": llm = SparkAPI() elif CONFIG.zhipuai_api_key: llm = ZhiPuAIGPTAPI() + elif CONFIG.open_llm_api_base: + llm = OpenLLMGPTAPI() + elif CONFIG.fireworks_api_key: + llm = FireWorksGPTAPI() else: raise RuntimeError("You should config a LLM configuration first") diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py new file mode 100644 index 000000000..23126af2d --- /dev/null +++ b/metagpt/provider/fireworks_api.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : fireworks.ai's api + +import openai + +from metagpt.config import CONFIG +from metagpt.provider.openai_api import OpenAIGPTAPI, CostManager, RateLimiter + + +class FireWorksGPTAPI(OpenAIGPTAPI): + + def __init__(self): + self.__init_fireworks(CONFIG) + self.llm = openai + self.model = CONFIG.fireworks_api_model + self.auto_max_tokens = False + self._cost_manager = CostManager() + RateLimiter.__init__(self, rpm=self.rpm) + + def __init_fireworks(self, config: "Config"): + openai.api_key = config.fireworks_api_key + openai.api_base = config.fireworks_api_base + self.rpm = int(config.get("RPM", 10)) diff --git a/metagpt/provider/open_llm_api.py b/metagpt/provider/open_llm_api.py new file mode 100644 index 000000000..a6820b42b --- /dev/null +++ b/metagpt/provider/open_llm_api.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : self-host open llm model with openai-compatible interface + +import openai + +from metagpt.logs import logger +from metagpt.config import CONFIG +from metagpt.provider.openai_api import OpenAIGPTAPI, CostManager, RateLimiter + + +class OpenLLMCostManager(CostManager): + """ open llm model is self-host, it's free and without cost""" + + def update_cost(self, prompt_tokens, completion_tokens, model): + """ + Update the total cost, prompt tokens, and completion tokens. + + Args: + prompt_tokens (int): The number of tokens used in the prompt. + completion_tokens (int): The number of tokens used in the completion. + model (str): The model used for the API call. + """ + self.total_prompt_tokens += prompt_tokens + self.total_completion_tokens += completion_tokens + + logger.info( + f"Max budget: ${CONFIG.max_budget:.3f} | " + f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}" + ) + CONFIG.total_cost = self.total_cost + + +class OpenLLMGPTAPI(OpenAIGPTAPI): + + def __init__(self): + self.__init_openllm(CONFIG) + self.llm = openai + self.model = CONFIG.open_llm_api_model + self.auto_max_tokens = False + self._cost_manager = OpenLLMCostManager() + RateLimiter.__init__(self, rpm=self.rpm) + + def __init_openllm(self, config: "Config"): + openai.api_key = "sk-xx" # self-host api doesn't need api-key, use the default value + openai.api_base = config.open_llm_api_base + self.rpm = int(config.get("RPM", 10)) diff --git a/metagpt/provider/postprecess/base_postprecess_plugin.py b/metagpt/provider/postprecess/base_postprecess_plugin.py new file mode 100644 index 000000000..702a03194 --- /dev/null +++ b/metagpt/provider/postprecess/base_postprecess_plugin.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : base llm postprocess plugin to do the operations like repair the raw llm output + +from typing import Union + +from metagpt.logs import logger +from metagpt.utils.repair_llm_raw_output import RepairType +from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, extract_content_from_output, \ + retry_parse_json_text + + +class BasePostPrecessPlugin(object): + + model = None # the plugin of the `model`, use to judge in `llm_postprecess` + + def run_repair_llm_output(self, output: str, schema: dict, req_key: str = "[/CONTENT]") -> Union[dict, list]: + """ + repair steps + 1. repair the case sensitive problem using the schema's fields + 2. extract the content from the req_key pair( xx[REQ_KEY]xxx[/REQ_KEY]xx ) + 3. repair the invalid json text in the content + 4. parse the json text and repair it according to the exception with retry loop + """ + output_class_fields = list(schema["properties"].keys()) # Custom ActionOutput's fields + + content = self.run_repair_llm_raw_output(output, req_keys=output_class_fields + [req_key]) + content = self.run_extract_content_from_output(content, right_key=req_key) + # # req_keys mocked + content = self.run_repair_llm_raw_output(content, req_keys=[None], repair_type=RepairType.JSON) + parsed_data = self.run_retry_parse_json_text(content) + + return parsed_data + + def run_repair_llm_raw_output(self, content: str, req_keys: list[str], repair_type: str = None) -> str: + """ inherited class can re-implement the function""" + return repair_llm_raw_output(content, req_keys=req_keys, repair_type=repair_type) + + def run_extract_content_from_output(self, content: str, right_key: str) -> str: + """ inherited class can re-implement the function""" + return extract_content_from_output(content, right_key=right_key) + + def run_retry_parse_json_text(self, content: str) -> Union[dict, list]: + """ inherited class can re-implement the function""" + logger.info(f"extracted json CONTENT from output:\n{content}") + parsed_data = retry_parse_json_text(output=content) # should use output=content + return parsed_data + + def run(self, output: str, schema: dict, req_key: str = "[/CONTENT]") -> Union[dict, list]: + """ + this is used for prompt with a json-format output requirement and outer pair key, like + [REQ_KEY] + { + "Key": "value" + } + [/REQ_KEY] + + Args + outer (str): llm raw output + schema: output json schema + req_key: outer pair right key, usually in `[/REQ_KEY]` format + """ + assert len(schema.get("properties")) > 0 + assert "/" in req_key + + # current, postprocess only deal the repair_llm_raw_output + new_output = self.run_repair_llm_output( + output=output, + schema=schema, + req_key=req_key + ) + return new_output diff --git a/metagpt/provider/postprecess/llm_output_postprecess.py b/metagpt/provider/postprecess/llm_output_postprecess.py new file mode 100644 index 000000000..4b5955061 --- /dev/null +++ b/metagpt/provider/postprecess/llm_output_postprecess.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : the entry of choosing which PostProcessPlugin to deal particular LLM model's output + +from typing import Union + +from metagpt.provider.postprecess.base_postprecess_plugin import BasePostPrecessPlugin + + +def llm_output_postprecess(output: str, schema: dict, req_key: str = "[/CONTENT]", + model_name: str = None) -> Union[dict, str]: + """ + default use BasePostPrecessPlugin if there is not matched plugin. + """ + # TODO choose different model's plugin according to the model_name + postprecess_plugin = BasePostPrecessPlugin() + + result = postprecess_plugin.run( + output=output, + schema=schema, + req_key=req_key + ) + return result diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index a12a36fcc..4a632b80c 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -91,13 +91,13 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - idx1 = sub_output.rfind("}") idx2 = sub_output.rindex("]") idx = idx1 if idx1 >= idx2 else idx2 - sub_output = sub_output[: idx] + sub_output = sub_output[: idx+1] return sub_output if output.strip().endswith("}") or (output.strip().endswith("]") and not output.strip().endswith(left_key)): # # avoid [req_key]xx[req_key] case to append [/req_key] output = output + "\n" + right_key - elif judge_potential_json(output, left_key): + elif judge_potential_json(output, left_key) and (not output.strip().endswith(left_key)): sub_content = judge_potential_json(output, left_key) output = sub_content + "\n" + right_key @@ -116,7 +116,7 @@ def repair_json_format(output: str) -> str: elif output.endswith("}]"): output = output[:-1] logger.info(f"repair_json_format: {'}]'}") - elif output.startswith("{") and output.startswith("]"): + elif output.startswith("{") and output.endswith("]"): output = output[:-1] + "}" return output @@ -183,9 +183,11 @@ def repair_invalid_json(output: str, error: str) -> str: if line.endswith("],"): # problem, redundant char `]` line = line.replace("]", "") - elif line.endswith("},"): + elif line.endswith("},") and not output.endswith("},"): # problem, redundant char `}` line = line.replace("}", "") + elif line.endswith("},") and output.endswith("},"): + line = line[:-1] elif '",' not in line: line = f'{line}",' elif "," not in line: @@ -218,11 +220,10 @@ def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["R """ if retry_state.outcome.failed: if len(retry_state.args) > 0: - # # can't used as args=retry_state.args + # # can't be used as args=retry_state.args func_param_output = retry_state.args[0] elif len(retry_state.kwargs) > 0: func_param_output = retry_state.kwargs.get("output", "") - # import pdb; pdb.set_trace() exp_str = str(retry_state.outcome.exception()) logger.warning(f"parse json from content inside [CONTENT][/CONTENT] failed at retry " f"{retry_state.attempt_number}, try to fix it, exp: {exp_str}") @@ -265,6 +266,7 @@ def extract_content_from_output(content: str, right_key: str = "[/CONTENT]"): break return cont.strip() + # TODO construct the extract pattern with the `right_key` raw_content = copy.deepcopy(content) pattern = r"\[CONTENT\]([\s\S]*)\[/CONTENT\]" new_content = re_extract_content(raw_content, pattern) diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index dfcf60ad5..8779c965c 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -77,11 +77,11 @@ def test_required_key_pair_missing(): raw_output = '''[CONTENT] { - "a": "b" + "key": "value" ]''' target_output = '''[CONTENT] { - "a": "b" + "key": "value" ] [/CONTENT]''' @@ -92,17 +92,15 @@ def test_required_key_pair_missing(): raw_output = '''[CONTENT] tag [CONTENT] { - "a": "b" + "key": "value" } xxx ''' - target_output = '''[CONTENT] tag -[CONTENT] + target_output = '''[CONTENT] { - "a": "b" + "key": "value" } -[/CONTENT] -''' +[/CONTENT]''' output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) assert output == target_output @@ -117,6 +115,22 @@ def test_repair_json_format(): repair_type=RepairType.JSON) assert output == target_output + raw_output = "[{ xxx }" + target_output = "{ xxx }" + + output = repair_llm_raw_output(output=raw_output, + req_keys=[None], + repair_type=RepairType.JSON) + assert output == target_output + + raw_output = "{ xxx ]" + target_output = "{ xxx }" + + output = repair_llm_raw_output(output=raw_output, + req_keys=[None], + repair_type=RepairType.JSON) + assert output == target_output + def test_retry_parse_json_text(): invalid_json_text = """{ @@ -130,7 +144,7 @@ def test_retry_parse_json_text(): "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", "Requirement Analysis": "The requirements are clear and well-defined" } - output = retry_parse_json_text(invalid_json_text) + output = retry_parse_json_text(output=invalid_json_text) assert output == target_json invalid_json_text = """{ @@ -144,7 +158,7 @@ def test_retry_parse_json_text(): "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", "Requirement Analysis": "The requirements are clear and well-defined" } - output = retry_parse_json_text(invalid_json_text) + output = retry_parse_json_text(output=invalid_json_text) assert output == target_json From 9a2ac792fe1100c2f86783b547bfafcdeae2c95f Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 01:55:56 +0800 Subject: [PATCH 06/38] add __init__ --- metagpt/provider/postprecess/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 metagpt/provider/postprecess/__init__.py diff --git a/metagpt/provider/postprecess/__init__.py b/metagpt/provider/postprecess/__init__.py new file mode 100644 index 000000000..2bcf8efd0 --- /dev/null +++ b/metagpt/provider/postprecess/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : From 1a67878b7e8ae3c07dc9306659ff551b18fe00b0 Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 09:29:40 +0800 Subject: [PATCH 07/38] move after_log as general one --- metagpt/actions/action.py | 16 ++-------------- metagpt/utils/utils.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 14 deletions(-) create mode 100644 metagpt/utils/utils.py diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index cb5bd9ce1..0a7a1656d 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -6,7 +6,6 @@ @File : action.py """ -import typing from abc import ABC from typing import Optional @@ -16,21 +15,10 @@ from metagpt.actions.action_output import ActionOutput from metagpt.llm import LLM from metagpt.logs import logger from metagpt.utils.common import OutputParser +from metagpt.utils.utils import general_after_log from metagpt.provider.postprecess.llm_output_postprecess import llm_output_postprecess -def action_after_log(logger: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]: - def log_it(retry_state: "RetryCallState") -> None: - if retry_state.fn is None: - fn_name = "" - else: - fn_name = _utils.get_callback_name(retry_state.fn) - logger.error(f"Finished call to '{fn_name}' after {sec_format % retry_state.seconds_since_start}(s), " - f"this was the {_utils.to_ordinal(retry_state.attempt_number)} time calling it. " - f"exp: {retry_state.outcome.exception()}") - return log_it - - class Action(ABC): def __init__(self, name: str = "", context=None, llm: LLM = None): self.name: str = name @@ -65,7 +53,7 @@ class Action(ABC): @retry( stop=stop_after_attempt(3), wait=wait_fixed(1), - after=action_after_log(logger), + after=general_after_log(logger), ) async def _aask_v1( self, diff --git a/metagpt/utils/utils.py b/metagpt/utils/utils.py new file mode 100644 index 000000000..f479ec3b8 --- /dev/null +++ b/metagpt/utils/utils.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : + +import typing + +from tenacity import after_log, _utils + + +def general_after_log(logger: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]: + def log_it(retry_state: "RetryCallState") -> None: + if retry_state.fn is None: + fn_name = "" + else: + fn_name = _utils.get_callback_name(retry_state.fn) + logger.error(f"Finished call to '{fn_name}' after {sec_format % retry_state.seconds_since_start}(s), " + f"this was the {_utils.to_ordinal(retry_state.attempt_number)} time calling it. " + f"exp: {retry_state.outcome.exception()}") + return log_it From 502bb2c4498b1a672a2f890a8fda479f812165c2 Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 11:21:25 +0800 Subject: [PATCH 08/38] fix extract_content_from_output --- metagpt/utils/repair_llm_raw_output.py | 4 +++- tests/metagpt/utils/test_repair_llm_raw_output.py | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 4a632b80c..0b521a7b0 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -275,13 +275,15 @@ def extract_content_from_output(content: str, right_key: str = "[/CONTENT]"): # TODO find a more general pattern # # for `[CONTENT]xxx[CONTENT]xxxx[/CONTENT] situation logger.warning(f"extract_content try another pattern: {pattern}") - raw_content = copy.deepcopy(new_content + right_key) + if right_key not in new_content: + raw_content = copy.deepcopy(new_content + "\n" + right_key) # # pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]" new_content = re_extract_content(raw_content, pattern) else: if right_key in new_content: idx = new_content.find(right_key) new_content = new_content[:idx] + new_content = new_content.strip() return new_content diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index 8779c965c..553b57625 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -216,7 +216,8 @@ def test_extract_content_from_output(): 'UNCLEAR:\n\n* How to start the game.\n\nGreat job! This JSON output should provide a clear and ' \ 'comprehensive overview of the project\'s requirements and dependencies.' output = extract_content_from_output(output) - assert output.startswith('{\n"Required Python third-party packages') + assert output.startswith('{\n"Required Python third-party packages') and \ + output.endswith('UNCLEAR": "How to start the game."\n]') output = 'Sure, I would be happy to help! Here is the information you provided, formatted as a JSON object ' \ 'inside the [CONTENT] tag:\n\n[CONTENT]\n{\n"Original Requirements": "Create a 2048 game",\n"Search ' \ @@ -245,7 +246,8 @@ def test_extract_content_from_output(): '[/CONTENT]\n\nI hope this helps! Let me know if you have any further questions or if there anything ' \ 'else I can do to assist you.' output = extract_content_from_output(output) - assert output.startswith('{\n"Original Requirements"') + assert output.startswith('{\n"Original Requirements"') and \ + output.endswith('"Anything UNCLEAR": ""\n}') output = """ Sure, I'd be happy to help! Here's the JSON output for the given context:\n\n[CONTENT]\n{ "Implementation approach": "We will use the open-source framework PyGame to create a 2D game engine, which will @@ -270,4 +272,5 @@ def test_extract_content_from_output(): information for a developer to understand the design and implementation of the 2048 game. """ output = extract_content_from_output(output) - assert output.startswith('{\n"Implementation approach"') and "[/CONTENT]" not in output + assert output.startswith('{\n"Implementation approach"') and \ + output.endswith('"Anything UNCLEAR": "The requirement is clear to me."\n}') From c25d5a73d16c4977e4dac630279d650d5e34db53 Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 11:53:11 +0800 Subject: [PATCH 09/38] add ut test_repair_invalid_json --- metagpt/utils/repair_llm_raw_output.py | 18 ++++---- .../utils/test_repair_llm_raw_output.py | 42 +++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 0b521a7b0..f9e6d020d 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -182,19 +182,23 @@ def repair_invalid_json(output: str, error: str) -> str: # different general problems if line.endswith("],"): # problem, redundant char `]` - line = line.replace("]", "") + new_line = line.replace("]", "") elif line.endswith("},") and not output.endswith("},"): # problem, redundant char `}` - line = line.replace("}", "") + new_line = line.replace("}", "") elif line.endswith("},") and output.endswith("},"): - line = line[:-1] - elif '",' not in line: - line = f'{line}",' + new_line = line[:-1] + elif '",' not in line and ',' not in line: + new_line = f'{line}",' elif "," not in line: # problem, miss char `,` at the end. - line = f"{line}," + new_line = f"{line}," + elif "," in line and len(line) == 1: + new_line = f'"{line}' + elif '",' in line: + new_line = line[:-2] + "'," - arr[line_no] = line + arr[line_no] = new_line output = "\n".join(arr) logger.info(f"repair_invalid_json, raw error: {error}") diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index 553b57625..acacb3af3 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -132,6 +132,48 @@ def test_repair_json_format(): assert output == target_output +def test_repair_invalid_json(): + raw_output = """{ + "key": "value" + }, +}""" + target_output = """{ + "key": "value" +, +}""" + output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 3 column 1") + assert output == target_output + + raw_output = """{ + "key": " +value + }, +}""" + target_output = """{ + "key": " +value +", +}""" + output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 4 column 1") + output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1") + assert output == target_output + + raw_output = """{ + "key": ' +value + }, +}""" + target_output = """{ + "key": ' +value +', +}""" + output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 4 column 1") + output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1") + output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1") + assert output == target_output + + def test_retry_parse_json_text(): invalid_json_text = """{ "Original Requirements": "Create a 2048 game", From 56b75b34ddd74013187a50630b92fe7bede0a40d Mon Sep 17 00:00:00 2001 From: better629 Date: Thu, 23 Nov 2023 13:38:33 +0800 Subject: [PATCH 10/38] rm not fully ready claude_api --- metagpt/llm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/metagpt/llm.py b/metagpt/llm.py index 1f7d1b4c9..7b490ec4a 100644 --- a/metagpt/llm.py +++ b/metagpt/llm.py @@ -21,8 +21,6 @@ def LLM() -> "BaseGPTAPI": # TODO a little trick, can use registry to initialize LLM instance further if CONFIG.openai_api_key: llm = OpenAIGPTAPI() - elif CONFIG.claude_api_key: - llm = Claude() elif CONFIG.spark_api_key: llm = SparkAPI() elif CONFIG.zhipuai_api_key: From f1e01c5ba8b2246e763da3e7f850f0f4f8a30675 Mon Sep 17 00:00:00 2001 From: better629 Date: Mon, 4 Dec 2023 11:12:13 +0800 Subject: [PATCH 11/38] set config value not relay on key.yaml --- tests/metagpt/utils/test_repair_llm_raw_output.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index acacb3af3..a2dd18516 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -2,7 +2,9 @@ # -*- coding: utf-8 -*- # @Desc : unittest of repair_llm_raw_output -import pytest + +from metagpt.config import CONFIG +CONFIG.repair_llm_output = True from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType, repair_invalid_json,\ extract_content_from_output, retry_parse_json_text From fb69c107feaf866f704299b003072baa3f760ef7 Mon Sep 17 00:00:00 2001 From: better629 Date: Mon, 4 Dec 2023 21:56:43 +0800 Subject: [PATCH 12/38] rm useless deepcopy --- metagpt/utils/repair_llm_raw_output.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index f9e6d020d..124bcba89 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -82,8 +82,7 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - if left_key not in output: output = left_key + "\n" + output if right_key not in output: - def judge_potential_json(routput: str, left_key: str) -> Union[str]: - routput = copy.deepcopy(routput) + def judge_potential_json(routput: str, left_key: str) -> Union[str, None]: ridx = routput.rfind(left_key) if ridx < 0: return None From 7833e5767305153b4f2b0fb602f57ab72e6cf035 Mon Sep 17 00:00:00 2001 From: zeeland Date: Tue, 5 Dec 2023 16:30:46 +0800 Subject: [PATCH 13/38] pref: optimize log --- metagpt/logs.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/metagpt/logs.py b/metagpt/logs.py index b2052e9b8..471d57fe9 100644 --- a/metagpt/logs.py +++ b/metagpt/logs.py @@ -7,18 +7,24 @@ """ import sys +from datetime import datetime from loguru import logger as _logger from metagpt.const import PROJECT_ROOT + def define_log_level(print_level="INFO", logfile_level="DEBUG"): """调整日志级别到level之上 Adjust the log level to above level """ + current_date = datetime.now() + formatted_date = current_date.strftime("%Y%m%d") + _logger.remove() _logger.add(sys.stderr, level=print_level) - _logger.add(PROJECT_ROOT / 'logs/log.txt', level=logfile_level) + _logger.add(PROJECT_ROOT / f"logs/{formatted_date}.log", level=logfile_level) return _logger + logger = define_log_level() From d6cc0165fcc0862130388301d7ba04b74f937257 Mon Sep 17 00:00:00 2001 From: mo Date: Mon, 11 Dec 2023 09:21:55 +0800 Subject: [PATCH 14/38] fix prompts --- metagpt/prompts/generate_skill.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/prompts/generate_skill.md b/metagpt/prompts/generate_skill.md index 74948cd15..e96f8181a 100644 --- a/metagpt/prompts/generate_skill.md +++ b/metagpt/prompts/generate_skill.md @@ -10,7 +10,7 @@ from typing import Optional from abc import ABC from metagpt.llm import LLM # Large language model, similar to GPT -n + class Action(ABC): def __init__(self, name='', context=None, llm: LLM = LLM()): self.name = name From 12208154ee0fc5f913fe104ec722cae074c95d42 Mon Sep 17 00:00:00 2001 From: better629 Date: Tue, 12 Dec 2023 15:06:20 +0800 Subject: [PATCH 15/38] simplify code --- metagpt/utils/repair_llm_raw_output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 124bcba89..0a461d360 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -222,10 +222,10 @@ def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["R } """ if retry_state.outcome.failed: - if len(retry_state.args) > 0: + if retry_state.args: # # can't be used as args=retry_state.args func_param_output = retry_state.args[0] - elif len(retry_state.kwargs) > 0: + elif retry_state.kwargs: func_param_output = retry_state.kwargs.get("output", "") exp_str = str(retry_state.outcome.exception()) logger.warning(f"parse json from content inside [CONTENT][/CONTENT] failed at retry " From 97cd9cd98d1a53481307c6a7014b675e1c0321af Mon Sep 17 00:00:00 2001 From: 0aaryan Date: Tue, 12 Dec 2023 15:20:29 +0530 Subject: [PATCH 16/38] Fix: Spelling errors in words (quoto -> quote) #521 --- metagpt/actions/debug_error.py | 2 +- metagpt/actions/design_api.py | 4 ++-- metagpt/actions/write_code.py | 2 +- metagpt/actions/write_prd.py | 4 ++-- metagpt/actions/write_test.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metagpt/actions/debug_error.py b/metagpt/actions/debug_error.py index d69a22dba..101cc2025 100644 --- a/metagpt/actions/debug_error.py +++ b/metagpt/actions/debug_error.py @@ -22,7 +22,7 @@ The message is as follows: {context} --- Now you should start rewriting the code: -## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS IN ONLY ONE FILE. +## file name of the code to rewrite: Write code with triple quote. Do your best to implement THIS IN ONLY ONE FILE. """ class DebugError(Action): def __init__(self, name="DebugError", context=None, llm=None): diff --git a/metagpt/actions/design_api.py b/metagpt/actions/design_api.py index 75df8b909..fc3be602b 100644 --- a/metagpt/actions/design_api.py +++ b/metagpt/actions/design_api.py @@ -33,7 +33,7 @@ Max Output: 8192 chars or 2048 tokens. Try to use them up. ## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select the appropriate open-source framework. -## Python package name: Provide as Python str with python triple quoto, concise and clear, characters only use a combination of all lowercase and underscores +## Python package name: Provide as Python str with python triple quote, concise and clear, characters only use a combination of all lowercase and underscores ## File list: Provided as Python list[str], the list of ONLY REQUIRED files needed to write the program(LESS IS MORE!). Only need relative paths, comply with PEP8 standards. ALWAYS write a main.py or app.py here @@ -86,7 +86,7 @@ Attention: Use '##' to split sections, not '#', and '## ' SHOULD W ## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select the appropriate open-source framework. -## Python package name: Provide as Python str with python triple quoto, concise and clear, characters only use a combination of all lowercase and underscores +## Python package name: Provide as Python str with python triple quote, concise and clear, characters only use a combination of all lowercase and underscores ## File list: Provided as Python list[str], the list of ONLY REQUIRED files needed to write the program(LESS IS MORE!). Only need relative paths, comply with PEP8 standards. ALWAYS write a main.py or app.py here diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index a5dc8e059..a89bce60f 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -18,7 +18,7 @@ NOTICE Role: You are a professional engineer; the main goal is to write PEP8 compliant, elegant, modular, easy to read and maintain Python 3.9 code (but you can also use other programming language) ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenced "Format example". -## Code: {filename} Write code with triple quoto, based on the following list and context. +## Code: {filename} Write code with triple quote, based on the following list and context. 1. Do your best to implement THIS ONLY ONE FILE. ONLY USE EXISTING API. IF NO API, IMPLEMENT IT. 2. Requirement: Based on the context, implement one following code file, note to return only in code form, your code will be part of the entire project, so please implement complete, reliable, reusable code snippets 3. Attention1: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index bd04ca79e..52a99dafc 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -23,7 +23,7 @@ templates = { ## Search Information {search_information} -## mermaid quadrantChart code syntax example. DONT USE QUOTO IN CODE DUE TO INVALID SYNTAX. Replace the with REAL COMPETITOR NAME +## mermaid quadrantChart code syntax example. DONT USE QUOTE IN CODE DUE TO INVALID SYNTAX. Replace the with REAL COMPETITOR NAME ```mermaid quadrantChart title Reach and engagement of campaigns @@ -108,7 +108,7 @@ and only output the json inside this tag, nothing else ## Search Information {search_information} -## mermaid quadrantChart code syntax example. DONT USE QUOTO IN CODE DUE TO INVALID SYNTAX. Replace the with REAL COMPETITOR NAME +## mermaid quadrantChart code syntax example. DONT USE QUOTE IN CODE DUE TO INVALID SYNTAX. Replace the with REAL COMPETITOR NAME ```mermaid quadrantChart title Reach and engagement of campaigns diff --git a/metagpt/actions/write_test.py b/metagpt/actions/write_test.py index 35ff36dc2..e2352b641 100644 --- a/metagpt/actions/write_test.py +++ b/metagpt/actions/write_test.py @@ -26,7 +26,7 @@ Attention: Use '##' to split sections, not '#', and '## ' SHOULD W ``` Note that the code to test is at {source_file_path}, we will put your test code at {workspace}/tests/{test_file_name}, and run your test code from {workspace}, you should correctly import the necessary classes based on these file locations! -## {test_file_name}: Write test code with triple quoto. Do your best to implement THIS ONLY ONE FILE. +## {test_file_name}: Write test code with triple quote. Do your best to implement THIS ONLY ONE FILE. """ From 88bbc75d565a8549ed790c78d95fdd6759630085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Wed, 13 Dec 2023 22:19:55 +0800 Subject: [PATCH 17/38] fixbug: gitignore error after project renamed --- metagpt/utils/git_repository.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metagpt/utils/git_repository.py b/metagpt/utils/git_repository.py index d372fd22e..9827b8252 100644 --- a/metagpt/utils/git_repository.py +++ b/metagpt/utils/git_repository.py @@ -204,6 +204,7 @@ class GitRepository: logger.warning(f"Move {str(self.workdir)} to {str(new_path)} error: {e}") logger.info(f"Rename directory {str(self.workdir)} to {str(new_path)}") self._repository = Repo(new_path) + self._gitignore_rules = parse_gitignore(full_path=str(new_path / ".gitignore")) def get_files(self, relative_path: Path | str, root_relative_path: Path | str = None, filter_ignored=True) -> List: """ From ad0e5a6da83d6ded26f9b7f36b834c0bba78b8b9 Mon Sep 17 00:00:00 2001 From: geekan Date: Tue, 12 Dec 2023 16:49:41 +0800 Subject: [PATCH 18/38] action_node: make it work at first step. --- metagpt/actions/action_node.py | 258 +++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 metagpt/actions/action_node.py diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py new file mode 100644 index 000000000..4fbd3ce7f --- /dev/null +++ b/metagpt/actions/action_node.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/12/11 18:45 +@Author : alexanderwu +@File : action_node.py +""" +from typing import Dict, Type, List, Any +import json + +from pydantic import BaseModel, create_model, root_validator, validator +# , model_validator, field_validator + +from metagpt.logs import logger + + +def dict_to_markdown(d, prefix="##", postfix="\n\n"): + markdown_str = "" + for key, value in d.items(): + markdown_str += f"{prefix} {key}: {value}{postfix}" + return markdown_str + + +class ActionNode: + """ActionNode is a tree of nodes.""" + + # Action Inputs + key: str # Product Requirement / File list / Code + expected_type: Type # such as str / int / float etc. + # context: str # everything in the history. + instruction: str # the instructions should be followed. + example: str # example for In Context-Learning. + + # Action Outputs + content: str + instruct_content: BaseModel + children: dict[str, "ActionNode"] + + def __init__(self, key, expected_type, instruction, example, content="", + children=None): + self.key = key + self.expected_type = expected_type + self.instruction = instruction + self.example = example + self.content = content + self.children = children if children is not None else {} + + def __str__(self): + return f"{self.key}, {self.expected_type}, {self.instruction}, {self.example}" \ + f", {self.content}, {self.children}" + + def __repr__(self): + return self.__str__() + + def add_child(self, node: "ActionNode"): + """增加子ActionNode""" + self.children[node.key] = node + + def add_childs(self, nodes: List["ActionNode"]): + """批量增加子ActionNode""" + for node in nodes: + self.add_child(node) + + def get_children_mapping(self) -> Dict[str, Type]: + """获得子ActionNode的字典,以key索引""" + return {k: v.expected_type for k, v in self.children.items()} + + @classmethod + def create_model_class(cls, class_name: str, mapping: Dict[str, Type]): + """基于pydantic v1的模型动态生成,用来检验结果类型正确性""" + new_class = create_model(class_name, **mapping) + + @validator("*", allow_reuse=True) + def check_name(v, field): + if field.name not in mapping.keys(): + raise ValueError(f"Unrecognized block: {field.name}") + return v + + @root_validator(pre=True, allow_reuse=True) + def check_missing_fields(values): + required_fields = set(mapping.keys()) + missing_fields = required_fields - set(values.keys()) + if missing_fields: + raise ValueError(f"Missing fields: {missing_fields}") + return values + + new_class.__validator_check_name = classmethod(check_name) + new_class.__root_validator_check_missing_fields = classmethod(check_missing_fields) + return new_class + + @classmethod + def create_model_class_v2(cls, class_name: str, mapping: Dict[str, Type]): + """基于pydantic v2的模型动态生成,用来检验结果类型正确性,待验证""" + new_class = create_model(class_name, **mapping) + + @model_validator(mode='before') + def check_missing_fields(data): + required_fields = set(mapping.keys()) + missing_fields = required_fields - set(data.keys()) + if missing_fields: + raise ValueError(f"Missing fields: {missing_fields}") + return data + + @field_validator('*') + def check_name(v: Any, field: str) -> Any: + if field not in mapping.keys(): + raise ValueError(f"Unrecognized block: {field}") + return v + + new_class.__model_validator_check_missing_fields = classmethod(check_missing_fields) + new_class.__field_validator_check_name = classmethod(check_name) + return new_class + + def create_children_class(self): + """使用object内有的字段直接生成model_class""" + class_name = f"{self.key}_AN" + mapping = self.get_children_mapping() + return self.create_model_class(class_name, mapping) + + def to_dict(self, format_func=None, mode="all") -> Dict: + # 如果没有提供格式化函数,使用默认的格式化方式 + if format_func is None: + format_func = lambda node: f"{node.instruction}" + + # 使用提供的格式化函数来格式化当前节点的值 + formatted_value = format_func(self) + + # 创建当前节点的键值对 + if mode == "children": + node_dict = {} + else: + node_dict = {self.key: formatted_value} + + if mode == "root": + return node_dict + + # 遍历子节点并递归调用 to_dict 方法 + for child_key, child_node in self.children.items(): + node_dict.update(child_node.to_dict(format_func)) + + return node_dict + + def compile_to(self, i: Dict, to="raw") -> str: + if to == "json": + return json.dumps(i, indent=4) + elif to == "markdown": + return dict_to_markdown(i) + else: + return str(i) + + def compile_instruction(self, to="raw", mode="children") -> str: + """compile to raw/json/markdown template with all/root/children nodes""" + format_func = lambda i: f"{i.expected_type} # {i.instruction}" + nodes = self.to_dict(format_func=format_func, mode=mode) + return self.compile_to(nodes, to) + + def compile_example(self, to="raw", mode="all") -> str: + """compile to raw/json/markdown examples with all/root/children nodes""" + format_func = lambda i: f"{i.example}" + nodes = self.to_dict(format_func=format_func, mode=mode) + return self.compile_to(nodes, to) + + def compile(self, to="raw", mode="all") -> str: + pass + + def run(self): + """运行这个ActionNode,可以采用不同策略,比如只运行子节点""" + pass + + +IMPLEMENTATION_APPROACH = ActionNode( + key="implementation_approach", + expected_type=str, + instruction="Analyze the difficult points of the requirements, select the appropriate open-source framework", + example="We will ..." +) + +PROJECT_NAME = ActionNode( + key="project_name", + expected_type=str, + instruction="The project name with underline", + example="game_2048" +) + +FILE_LIST = ActionNode( + key="file_list", + expected_type=List[str], + instruction="Only need relative paths. ALWAYS write a main.py or app.py here", + example="['main.py', 'const.py', 'utils.py']" +) + +DATA_STRUCTURES_AND_INTERFACES = ActionNode( + key="data_structures_and_interfaces", + expected_type=str, + instruction="Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions " + "(with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " + "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", + example="""classDiagram +class Game{{ + +int score +}} +... +Game "1" -- "1" Food: has""" +) + +PROGRAM_CALL_FLOW = ActionNode( + key="program_call_flow", + expected_type=str, + instruction="Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE " + "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", + example="""sequenceDiagram +participant M as Main +... +G->>M: end game""" +) + +ANYTHING_UNCLEAR = ActionNode( + key="anything_unclear", + expected_type=str, + instruction="Mention unclear project aspects, then try to clarify it.", + example="Clarification needed on third-party API integration, ..." +) + + +ACTION_NODES = [ + IMPLEMENTATION_APPROACH, + PROJECT_NAME, + FILE_LIST, + DATA_STRUCTURES_AND_INTERFACES, + PROGRAM_CALL_FLOW, + ANYTHING_UNCLEAR +] + + +def action_node_from_tuple_example(): + # 示例:列表中包含元组 + list_of_tuples = [ + ("key1", str, "Instruction 1", "Example 1", "Content 1", {"child1": ...}), + ("key2", int, "Instruction 2", "Example 2", "Content 2"), + ("key3", int, "Instruction 3", "Example 3") + ] + + # 从列表中创建 ActionNode 实例 + nodes = [ActionNode(*data) for data in list_of_tuples] + for i in nodes: + logger.info(i) + + +def main(): + write_design_node = ActionNode("WriteDesign", str, "", "") + write_design_node.add_childs(ACTION_NODES) + instruction = write_design_node.compile_instruction(to="markdown") + logger.info(instruction) + logger.info(write_design_node.compile_example()) + + +if __name__ == '__main__': + main() From bfdb8415adc0c23ef7654402c862bf8302d34f92 Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 13 Dec 2023 17:47:09 +0800 Subject: [PATCH 19/38] tuning action node code --- metagpt/actions/action.py | 6 +- metagpt/actions/action_node.py | 140 ++++++++++++------------------- metagpt/actions/design_api.py | 12 ++- metagpt/actions/write_prd.py | 4 +- metagpt/config.py | 2 +- metagpt/environment.py | 2 +- metagpt/utils/file_repository.py | 2 +- 7 files changed, 72 insertions(+), 96 deletions(-) diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index dc96699a9..40faaad41 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -25,9 +25,9 @@ class Action(ABC): llm = LLM() self.llm = llm self.context = context - self.prefix = "" - self.profile = "" - self.desc = "" + self.prefix = "" # aask*时会加上prefix,作为system_message + self.profile = "" # FIXME: USELESS + self.desc = "" # FIXME: USELESS self.content = "" self.instruct_content = None self.env = None diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 4fbd3ce7f..35912446d 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -5,7 +5,7 @@ @Author : alexanderwu @File : action_node.py """ -from typing import Dict, Type, List, Any +from typing import Dict, Type, List, Any, Tuple import json from pydantic import BaseModel, create_model, root_validator, validator @@ -14,7 +14,16 @@ from pydantic import BaseModel, create_model, root_validator, validator from metagpt.logs import logger -def dict_to_markdown(d, prefix="##", postfix="\n\n"): +SIMPLE_TEMPLATE = """ +## example +{example} + +## instruction +{instruction} +""" + + +def dict_to_markdown(d, prefix="###", postfix="\n"): markdown_str = "" for key, value in d.items(): markdown_str += f"{prefix} {key}: {value}{postfix}" @@ -23,13 +32,17 @@ def dict_to_markdown(d, prefix="##", postfix="\n\n"): class ActionNode: """ActionNode is a tree of nodes.""" + # 应该是定义子任务,收集子任务结果,并且父任务同时执行吗? + # 初期只提供两种模式,一种是用父任务compile,一种是用子任务逐个执行 + # 1. context、example、instruction-nodes、instruction-action + # 2. context、example # Action Inputs key: str # Product Requirement / File list / Code expected_type: Type # such as str / int / float etc. # context: str # everything in the history. instruction: str # the instructions should be followed. - example: str # example for In Context-Learning. + example: Any # example for In Context-Learning. # Action Outputs content: str @@ -56,7 +69,7 @@ class ActionNode: """增加子ActionNode""" self.children[node.key] = node - def add_childs(self, nodes: List["ActionNode"]): + def add_children(self, nodes: List["ActionNode"]): """批量增加子ActionNode""" for node in nodes: self.add_child(node) @@ -140,7 +153,7 @@ class ActionNode: return node_dict - def compile_to(self, i: Dict, to="raw") -> str: + def compile_to(self, i: Dict, to) -> str: if to == "json": return json.dumps(i, indent=4) elif to == "markdown": @@ -148,88 +161,49 @@ class ActionNode: else: return str(i) - def compile_instruction(self, to="raw", mode="children") -> str: + def tagging(self, text, to, tag="") -> str: + if not tag: + return text + if to == "json": + return f"[{tag}]\n" + "{" + text + "}" + f"\n[/{tag}]" + else: + return f"[{tag}]\n" + text + f"\n[/{tag}]" + + def _compile_f(self, to, mode, tag, format_func) -> str: + nodes = self.to_dict(format_func=format_func, mode=mode) + text = self.compile_to(nodes, to) + return self.tagging(text, to, tag) + + def compile_instruction(self, to="raw", mode="children", tag="") -> str: """compile to raw/json/markdown template with all/root/children nodes""" format_func = lambda i: f"{i.expected_type} # {i.instruction}" - nodes = self.to_dict(format_func=format_func, mode=mode) - return self.compile_to(nodes, to) + return self._compile_f(to, mode, tag, format_func) - def compile_example(self, to="raw", mode="all") -> str: + def compile_example(self, to="raw", mode="children", tag="") -> str: """compile to raw/json/markdown examples with all/root/children nodes""" - format_func = lambda i: f"{i.example}" - nodes = self.to_dict(format_func=format_func, mode=mode) - return self.compile_to(nodes, to) - def compile(self, to="raw", mode="all") -> str: - pass + # 这里不能使用f-string,因为转译为str后再json.dumps会额外加上引号,无法作为有效的example + # 错误示例:"File list": "['main.py', 'const.py', 'game.py']", 注意这里值不是list,而是str + format_func = lambda i: i.example + return self._compile_f(to, mode, tag, format_func) + + def compile(self, mode="children") -> Tuple[str, str]: + """ + mode: all/root/children + mode="children": 编译所有子节点为一个统一模板,包括instruction与example + mode="all": NotImplemented + mode="root": NotImplemented + """ + self.instruction = self.compile_instruction(to="json", mode=mode) + self.example = self.compile_example(to="json", tag="CONTENT", mode=mode) + # prompt = template.format(example=self.example, instruction=self.instruction) + return self.instruction, self.example def run(self): """运行这个ActionNode,可以采用不同策略,比如只运行子节点""" - pass - -IMPLEMENTATION_APPROACH = ActionNode( - key="implementation_approach", - expected_type=str, - instruction="Analyze the difficult points of the requirements, select the appropriate open-source framework", - example="We will ..." -) - -PROJECT_NAME = ActionNode( - key="project_name", - expected_type=str, - instruction="The project name with underline", - example="game_2048" -) - -FILE_LIST = ActionNode( - key="file_list", - expected_type=List[str], - instruction="Only need relative paths. ALWAYS write a main.py or app.py here", - example="['main.py', 'const.py', 'utils.py']" -) - -DATA_STRUCTURES_AND_INTERFACES = ActionNode( - key="data_structures_and_interfaces", - expected_type=str, - instruction="Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions " - "(with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " - "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", - example="""classDiagram -class Game{{ - +int score -}} -... -Game "1" -- "1" Food: has""" -) - -PROGRAM_CALL_FLOW = ActionNode( - key="program_call_flow", - expected_type=str, - instruction="Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE " - "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", - example="""sequenceDiagram -participant M as Main -... -G->>M: end game""" -) - -ANYTHING_UNCLEAR = ActionNode( - key="anything_unclear", - expected_type=str, - instruction="Mention unclear project aspects, then try to clarify it.", - example="Clarification needed on third-party API integration, ..." -) - - -ACTION_NODES = [ - IMPLEMENTATION_APPROACH, - PROJECT_NAME, - FILE_LIST, - DATA_STRUCTURES_AND_INTERFACES, - PROGRAM_CALL_FLOW, - ANYTHING_UNCLEAR -] + # 需要传入llm,并且实际在ActionNode中执行。需要规划好具体的执行方法 + raise NotImplementedError def action_node_from_tuple_example(): @@ -246,13 +220,5 @@ def action_node_from_tuple_example(): logger.info(i) -def main(): - write_design_node = ActionNode("WriteDesign", str, "", "") - write_design_node.add_childs(ACTION_NODES) - instruction = write_design_node.compile_instruction(to="markdown") - logger.info(instruction) - logger.info(write_design_node.compile_example()) - - if __name__ == '__main__': - main() + action_node_from_tuple_example() diff --git a/metagpt/actions/design_api.py b/metagpt/actions/design_api.py index 557ebcbbd..a6d559a4c 100644 --- a/metagpt/actions/design_api.py +++ b/metagpt/actions/design_api.py @@ -14,6 +14,7 @@ from pathlib import Path from typing import List from metagpt.actions import Action, ActionOutput +from metagpt.actions.design_api_an import DESIGN_API_NODE, SIMPLE_TEMPLATE from metagpt.config import CONFIG from metagpt.const import ( DATA_API_DESIGN_FILE_REPO, @@ -227,13 +228,22 @@ class WriteDesign(Action): # leaving room for global optimization in subsequent steps. return ActionOutput(content=changed_files.json(), instruct_content=changed_files) - async def _new_system_design(self, context, format=CONFIG.prompt_format): + async def _new_system_design_bakup(self, context, format=CONFIG.prompt_format): prompt_template, format_example = get_template(templates, format) format_example = format_example.format(project_name=CONFIG.project_name) prompt = prompt_template.format(context=context, format_example=format_example) system_design = await self._aask_v1(prompt, "system_design", OUTPUT_MAPPING, format=format) return system_design + async def _new_system_design(self, context, format=CONFIG.prompt_format): + instruction, example = DESIGN_API_NODE.compile() + prompt = SIMPLE_TEMPLATE.format(context=context, example=example, instruction=instruction) + # prompt_template, format_example = get_template(templates, format) + # format_example = format_example.format(project_name=CONFIG.project_name) + # prompt = prompt_template.format(context=context, format_example=format_example) + system_design = await self._aask_v1(prompt, "system_design", OUTPUT_MAPPING, format=format) + return system_design + async def _merge(self, prd_doc, system_design_doc, format=CONFIG.prompt_format): prompt = MERGE_PROMPT.format( old_design=system_design_doc.content, context=prd_doc.content, project_name=CONFIG.project_name diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index aad2422ef..0594d116e 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -361,7 +361,7 @@ class WritePRD(Action): ) if prd_doc: change_files.docs[prd_doc.filename] = prd_doc - logger.info(f"NEW PRD:{prd_doc.filename}") + logger.debug(f"new prd: {prd_doc.filename}") # Once all files under 'docs/prds/' have been compared with the newly added requirements, trigger the # 'publish' message to transition the workflow to the next stage. This design allows room for global # optimization in subsequent steps. @@ -394,7 +394,7 @@ class WritePRD(Action): async def _is_relative_to(self, new_requirement_doc, old_prd_doc) -> bool: prompt = IS_RELATIVE_PROMPT.format(old_prd=old_prd_doc.content, requirements=new_requirement_doc.content) res = await self._aask(prompt=prompt) - logger.info(f"REQ-RELATIVE:[{new_requirement_doc.root_relative_path}, {old_prd_doc.root_relative_path}]: {res}") + logger.info(f"REQ-RELATIVE: [{new_requirement_doc.root_relative_path}, {old_prd_doc.root_relative_path}]: {res}") if "YES" in res: return True return False diff --git a/metagpt/config.py b/metagpt/config.py index d04ae7291..d2390f704 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -108,7 +108,7 @@ class Config(metaclass=Singleton): def _ensure_workspace_exists(self): self.workspace_path.mkdir(parents=True, exist_ok=True) - logger.info(f"WORKSPACE_PATH set to {self.workspace_path}") + logger.debug(f"WORKSPACE_PATH set to {self.workspace_path}") def _init_with_config_files_and_env(self, yaml_file): """Load from config/key.yaml, config/config.yaml, and env in decreasing order of priority""" diff --git a/metagpt/environment.py b/metagpt/environment.py index 02eb3d340..7d1e307f3 100644 --- a/metagpt/environment.py +++ b/metagpt/environment.py @@ -58,7 +58,7 @@ class Environment(BaseModel): route the message to the message recipient is a problem addressed by the transport framework designed in RFC 113. """ - logger.info(f"publish_message: {message.dump()}") + logger.debug(f"publish_message: {message.dump()}") found = False # According to the routing feature plan in Chapter 2.2.3.2 of RFC 113 for role, subscription in self.members.items(): diff --git a/metagpt/utils/file_repository.py b/metagpt/utils/file_repository.py index 2cace7232..2eca799a8 100644 --- a/metagpt/utils/file_repository.py +++ b/metagpt/utils/file_repository.py @@ -205,7 +205,7 @@ class FileRepository: m = json.loads(doc.content) filename = Path(doc.filename).with_suffix(with_suffix) if with_suffix is not None else Path(doc.filename) await self.save(filename=str(filename), content=json_to_markdown(m), dependencies=dependencies) - logger.info(f"File Saved: {str(filename)}") + logger.debug(f"File Saved: {str(filename)}") @staticmethod async def get_file(filename: Path | str, relative_path: Path | str = ".") -> Document | None: From 5d7c228539be3d50e1e97d8927cef34852117f82 Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 13 Dec 2023 17:47:19 +0800 Subject: [PATCH 20/38] tuning action node code --- metagpt/actions/design_api_an.py | 146 +++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 metagpt/actions/design_api_an.py diff --git a/metagpt/actions/design_api_an.py b/metagpt/actions/design_api_an.py new file mode 100644 index 000000000..b4bd54849 --- /dev/null +++ b/metagpt/actions/design_api_an.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/12/12 22:24 +@Author : alexanderwu +@File : design_api_an.py +""" +from metagpt.actions.action_node import ActionNode +from metagpt.logs import logger + +IMPLEMENTATION_APPROACH = ActionNode( + key="Implementation approach", + expected_type=str, + instruction="Analyze the difficult points of the requirements, select the appropriate open-source framework", + example="We will ..." +) + +PROJECT_NAME = ActionNode( + key="Project name", + expected_type=str, + instruction="The project name with underline", + example="game_2048" +) + +FILE_LIST = ActionNode( + key="File list", + expected_type=list[str], + instruction="Only need relative paths. ALWAYS write a main.py or app.py here", + example=['main.py', 'game.py'] +) + +DATA_STRUCTURES_AND_INTERFACES = ActionNode( + key="Data structures and interfaces", + expected_type=str, + instruction="Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions " + "(with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " + "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", + example=""" classDiagram + class User { + +int id + +str username + +str email + +str password + __init__(id: int, username: str, email: str, password: str) + follow(user: User): void + like(content: Content): void + comment(content: Content, text: str): Comment + } + class Content { + +int id + +User author + +str title + +str body + +datetime created_at + +list likes + +list comments + __init__(id: int, author: User, title: str, body: str) + get_likes(): list + get_comments(): list + } + class Comment { + +int id + +User author + +str text + +datetime created_at + __init__(id: int, author: User, text: str) + } + class Leaderboard { + +list top_contents + update(): void + } + class SearchEngine { + +str query + search(): list + } + class RecommendationEngine { + +User user + recommend(): list + } + class TaskQueue { + +str task_name + enqueue(task: function): void + } + User "1" -- "*" Content: creates + Content "1" -- "*" Comment: includes + User "1" -- "*" Comment: writes + User "1" -- "*" User: follows + Content "1" -- "*" User: liked_by""" +) + +PROGRAM_CALL_FLOW = ActionNode( + key="Program call flow", + expected_type=str, + instruction="Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE " + "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", + example="""sequenceDiagram +participant M as Main +... +G->>M: end game""" +) + +ANYTHING_UNCLEAR = ActionNode( + key="Anything UNCLEAR", + expected_type=str, + instruction="Mention unclear project aspects, then try to clarify it.", + example="Clarification needed on third-party API integration, ..." +) + +ACTION_NODES = [ + IMPLEMENTATION_APPROACH, + PROJECT_NAME, + FILE_LIST, + DATA_STRUCTURES_AND_INTERFACES, + PROGRAM_CALL_FLOW, + ANYTHING_UNCLEAR +] + +DESIGN_API_NODE = ActionNode("DesignAPI", str, "", "") +DESIGN_API_NODE.add_children(ACTION_NODES) + +SIMPLE_TEMPLATE = """ +## context +{context} + +## example +{example} + +## instruction-nodes: ": # " +{instruction} + +## instruction-action +Role: You are an architect; the goal is to design a SOTA software system +Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. +Requirement: Fill in the above missing instruction-nodes based on the context +now, output wrapped inside [CONTENT][/CONTENT] as example, nothing else. +""" + + +def main(): + instruction, example = DESIGN_API_NODE.compile() + text = SIMPLE_TEMPLATE.format(context="", example=example, instruction=instruction) + logger.info(text) + + +if __name__ == '__main__': + main() From c0bcf57caf134008ea5c8bd9a2df3cbdb3465759 Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 15:58:05 +0800 Subject: [PATCH 21/38] Transfer Action usage to ActionNode for subsequent structured reasoning opportunities - Modifided actions: project_management / design_api / write_prd --- metagpt/actions/action.py | 20 +- metagpt/actions/action_node.py | 122 ++++++-- metagpt/actions/design_api.py | 189 +----------- metagpt/actions/design_api_an.py | 91 +----- metagpt/actions/project_management.py | 206 +------------ metagpt/actions/project_management_an.py | 82 +++++ metagpt/actions/write_prd.py | 366 +++-------------------- metagpt/actions/write_prd_an.py | 153 ++++++++++ metagpt/environment.py | 2 +- metagpt/llm.py | 3 +- metagpt/roles/architect.py | 4 +- metagpt/roles/engineer.py | 9 +- metagpt/roles/project_manager.py | 3 +- metagpt/roles/role.py | 5 +- metagpt/roles/searcher.py | 3 +- 15 files changed, 438 insertions(+), 820 deletions(-) create mode 100644 metagpt/actions/project_management_an.py create mode 100644 metagpt/actions/write_prd_an.py diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 40faaad41..2fd130cf5 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -27,18 +27,22 @@ class Action(ABC): self.context = context self.prefix = "" # aask*时会加上prefix,作为system_message self.profile = "" # FIXME: USELESS - self.desc = "" # FIXME: USELESS - self.content = "" - self.instruct_content = None - self.env = None + self.desc = "" # for skill manager + self.nodes = ... - def set_env(self, env): - self.env = env + # Output, useless + # self.content = "" + # self.instruct_content = None + # self.env = None + + # def set_env(self, env): + # self.env = env def set_prefix(self, prefix, profile): """Set prefix for later usage""" self.prefix = prefix self.profile = profile + self.llm.system_prompt = prefix def __str__(self): return self.__class__.__name__ @@ -62,10 +66,6 @@ class Action(ABC): system_msgs: Optional[list[str]] = None, format="markdown", # compatible to original format ) -> ActionOutput: - """Append default prefix""" - if not system_msgs: - system_msgs = [] - system_msgs.append(self.prefix) content = await self.llm.aask(prompt, system_msgs) logger.debug(content) output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 35912446d..178986ebe 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -5,25 +5,44 @@ @Author : alexanderwu @File : action_node.py """ -from typing import Dict, Type, List, Any, Tuple +import re +from typing import Dict, Type, List, Any, Tuple, Optional import json from pydantic import BaseModel, create_model, root_validator, validator # , model_validator, field_validator +from tenacity import wait_random_exponential, stop_after_attempt, retry +from metagpt.actions import ActionOutput +from metagpt.llm import BaseGPTAPI from metagpt.logs import logger +from metagpt.utils.common import OutputParser +from metagpt.utils.custom_decoder import CustomDecoder +CONSTRAINT = """ +- Language: Please use the same language as the user input. +- Format: output wrapped inside [CONTENT][/CONTENT] as format example, nothing else. +""" SIMPLE_TEMPLATE = """ -## example +## context +{context} + +## format example {example} -## instruction +## nodes: ": # " {instruction} + +## constraint +{constraint} + +## action +Fill in the above nodes based on the context. Answer in format example. """ -def dict_to_markdown(d, prefix="###", postfix="\n"): +def dict_to_markdown(d, prefix="-", postfix="\n"): markdown_str = "" for key, value in d.items(): markdown_str += f"{prefix} {key}: {value}{postfix}" @@ -32,22 +51,26 @@ def dict_to_markdown(d, prefix="###", postfix="\n"): class ActionNode: """ActionNode is a tree of nodes.""" - # 应该是定义子任务,收集子任务结果,并且父任务同时执行吗? - # 初期只提供两种模式,一种是用父任务compile,一种是用子任务逐个执行 - # 1. context、example、instruction-nodes、instruction-action - # 2. context、example + # Action Strgy + # - sop: 仅使用一级SOP + # - complex: 使用一级SOP+自定义策略填槽 + mode: str - # Action Inputs + # Action Context + context: str # all the context, including all necessary info + llm: BaseGPTAPI # LLM with aask interface + children: dict[str, "ActionNode"] + + # Action Input key: str # Product Requirement / File list / Code expected_type: Type # such as str / int / float etc. # context: str # everything in the history. instruction: str # the instructions should be followed. example: Any # example for In Context-Learning. - # Action Outputs + # Action Output content: str instruct_content: BaseModel - children: dict[str, "ActionNode"] def __init__(self, key, expected_type, instruction, example, content="", children=None): @@ -74,9 +97,16 @@ class ActionNode: for node in nodes: self.add_child(node) + @classmethod + def from_children(cls, key, nodes: List["ActionNode"]): + """直接从一系列的子nodes初始化""" + obj = cls(key, str, "", "") + obj.add_children(nodes) + return obj + def get_children_mapping(self) -> Dict[str, Type]: """获得子ActionNode的字典,以key索引""" - return {k: v.expected_type for k, v in self.children.items()} + return {k: (v.expected_type, ...) for k, v in self.children.items()} @classmethod def create_model_class(cls, class_name: str, mapping: Dict[str, Type]): @@ -131,6 +161,8 @@ class ActionNode: return self.create_model_class(class_name, mapping) def to_dict(self, format_func=None, mode="all") -> Dict: + """将当前节点与子节点都按照node: format的格式组织称字典""" + # 如果没有提供格式化函数,使用默认的格式化方式 if format_func is None: format_func = lambda node: f"{node.instruction}" @@ -165,7 +197,7 @@ class ActionNode: if not tag: return text if to == "json": - return f"[{tag}]\n" + "{" + text + "}" + f"\n[/{tag}]" + return f"[{tag}]\n" + text + f"\n[/{tag}]" else: return f"[{tag}]\n" + text + f"\n[/{tag}]" @@ -187,31 +219,73 @@ class ActionNode: format_func = lambda i: i.example return self._compile_f(to, mode, tag, format_func) - def compile(self, mode="children") -> Tuple[str, str]: + def compile(self, context, to="json", mode="children", template=SIMPLE_TEMPLATE) -> str: """ mode: all/root/children mode="children": 编译所有子节点为一个统一模板,包括instruction与example mode="all": NotImplemented mode="root": NotImplemented """ - self.instruction = self.compile_instruction(to="json", mode=mode) - self.example = self.compile_example(to="json", tag="CONTENT", mode=mode) - # prompt = template.format(example=self.example, instruction=self.instruction) - return self.instruction, self.example - def run(self): - """运行这个ActionNode,可以采用不同策略,比如只运行子节点""" + # FIXME: json instruction会带来 "Project name": "web_2048 # 项目名称使用下划线", + self.instruction = self.compile_instruction(to="markdown", mode=mode) + self.example = self.compile_example(to=to, tag="CONTENT", mode=mode) + prompt = template.format(context=context, example=self.example, instruction=self.instruction, + constraint=CONSTRAINT) + return prompt + @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) + async def _aask_v1( + self, + prompt: str, + output_class_name: str, + output_data_mapping: dict, + system_msgs: Optional[list[str]] = None, + format="markdown", # compatible to original format + ) -> ActionOutput: + content = await self.llm.aask(prompt, system_msgs) + logger.debug(content) + output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) + + if format == "json": + pattern = r"\[CONTENT\](\s*\{.*?\}\s*)\[/CONTENT\]" + matches = re.findall(pattern, content, re.DOTALL) + + for match in matches: + if match: + content = match + break + + parsed_data = CustomDecoder(strict=False).decode(content) + + else: # using markdown parser + parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) + + logger.debug(parsed_data) + instruct_content = output_class(**parsed_data) + return ActionOutput(content, instruct_content) + + def get(self, key): + return self.instruct_content.dict()[key] + + async def fill(self, context, llm, to="json"): + """运行这个ActionNode,并且填槽,可以采用不同策略,比如只运行子节点""" + self.llm = llm + prompt = self.compile(context=context, to=to) + mapping = self.get_children_mapping() + + class_name = f"{self.key}_AN" # 需要传入llm,并且实际在ActionNode中执行。需要规划好具体的执行方法 - raise NotImplementedError + output = await self._aask_v1(prompt, class_name, mapping, format=to) + self.content = output.content + self.instruct_content = output.instruct_content + return self def action_node_from_tuple_example(): # 示例:列表中包含元组 list_of_tuples = [ - ("key1", str, "Instruction 1", "Example 1", "Content 1", {"child1": ...}), - ("key2", int, "Instruction 2", "Example 2", "Content 2"), - ("key3", int, "Instruction 3", "Example 3") + ("key1", int, "Instruction 1", "Example 1") ] # 从列表中创建 ActionNode 实例 diff --git a/metagpt/actions/design_api.py b/metagpt/actions/design_api.py index a6d559a4c..fd58e0ca8 100644 --- a/metagpt/actions/design_api.py +++ b/metagpt/actions/design_api.py @@ -11,10 +11,10 @@ """ import json from pathlib import Path -from typing import List +# from typing import List from metagpt.actions import Action, ActionOutput -from metagpt.actions.design_api_an import DESIGN_API_NODE, SIMPLE_TEMPLATE +from metagpt.actions.design_api_an import DESIGN_API_NODE from metagpt.config import CONFIG from metagpt.const import ( DATA_API_DESIGN_FILE_REPO, @@ -26,166 +26,15 @@ from metagpt.const import ( from metagpt.logs import logger from metagpt.schema import Document, Documents from metagpt.utils.file_repository import FileRepository -from metagpt.utils.get_template import get_template +# from metagpt.utils.get_template import get_template from metagpt.utils.mermaid import mermaid_to_file -templates = { - "json": { - "PROMPT_TEMPLATE": """ -# Context -{context} - -## Format example -{format_example} ------ -Role: You are an architect; the goal is to design a SOTA PEP8-compliant python system -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirement: Fill in the following missing information based on the context, each section name is a key in json - -## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select appropriate open-source frameworks. - -## Project name: Constant text. - -## File list: Provided as Python list[str], the list of files needed (including HTML & CSS IF NEEDED) to write the program. Only need relative paths. ALWAYS write a main.py or app.py here - -## Data structures and interfaces: Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions (with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design. - -## Program call flow: Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT. - -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like format example, -and only output the json inside this tag, nothing else -""", - "FORMAT_EXAMPLE": """ -[CONTENT] -{{ - "Implementation approach": "We will ...", - "Project name": "{project_name}", - "File list": ["main.py"], - "Data structures and interfaces": ' - classDiagram - class Game{{ - +int score - }} - ... - Game "1" -- "1" Food: has - ', - "Program call flow": ' - sequenceDiagram - participant M as Main - ... - G->>M: end game - ', - "Anything UNCLEAR": "The requirement is clear to me." -}} -[/CONTENT] -""", - }, - "markdown": { - "PROMPT_TEMPLATE": """ -# Context -{context} - -## Format example -{format_example} ------ -Role: You are an architect; the goal is to design a SOTA PEP8-compliant python system; make the best use of good open source tools -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirement: Fill in the following missing information based on the context, note that all sections are response with code form separately -ATTENTION: Output carefully referenced "Format example" in format. - -## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select the appropriate open-source framework. - -## Project name: Constant text. - -## File list: Provided as Python list[str], the list of code files (including HTML & CSS IF NEEDED) to write the program. Only need relative paths. ALWAYS write a main.py or app.py here - -## Data structures and interfaces: Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions (with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design. - -## Program call flow: Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT. - -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. - -""", - "FORMAT_EXAMPLE": """ ---- -## Implementation approach -We will ... - -## Project name -```python -"{project_name}" -``` - -## File list -```python -[ - "main.py", -] -``` - -## Data structures and interfaces -```mermaid -classDiagram - class Game{ - +int score - } - ... - Game "1" -- "1" Food: has -``` - -## Program call flow -```mermaid -sequenceDiagram - participant M as Main - ... - G->>M: end game -``` - -## Anything UNCLEAR -The requirement is clear to me. ---- -""", - }, -} - -OUTPUT_MAPPING = { - "Implementation approach": (str, ...), - "Project name": (str, ...), - "File list": (List[str], ...), - "Data structures and interfaces": (str, ...), - "Program call flow": (str, ...), - "Anything UNCLEAR": (str, ...), -} - -MERGE_PROMPT = """ -## Old Design +NEW_REQ_TEMPLATE = """ +### Legacy Content {old_design} -## Context +### New Requirements {context} - ------ -Role: You are an architect; The goal is to incrementally update the "Old Design" based on the information provided by the "Context," aiming to design a SOTA PEP8-compliant python system; make the best use of good open source tools -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirement: Fill in the following missing information based on the context, note that all sections are response with code form separately -ATTENTION: Output carefully referenced "Old Design" in format. - -## Implementation approach: Provide as Plain text. Analyze the difficult points of the requirements, select the appropriate open-source framework. - -## Project name: Constant text "{project_name}". - -## File list: Provided as Python list[str], the list of code files (including HTML & CSS IF NEEDED) to write the program. Only need relative paths. ALWAYS write a main.py or app.py here - -## Data structures and interfaces: Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions (with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design. - -## Program call flow: Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT. - -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like "Old Design" format, -and only output the json inside this tag, nothing else """ @@ -228,30 +77,16 @@ class WriteDesign(Action): # leaving room for global optimization in subsequent steps. return ActionOutput(content=changed_files.json(), instruct_content=changed_files) - async def _new_system_design_bakup(self, context, format=CONFIG.prompt_format): - prompt_template, format_example = get_template(templates, format) - format_example = format_example.format(project_name=CONFIG.project_name) - prompt = prompt_template.format(context=context, format_example=format_example) - system_design = await self._aask_v1(prompt, "system_design", OUTPUT_MAPPING, format=format) - return system_design - async def _new_system_design(self, context, format=CONFIG.prompt_format): - instruction, example = DESIGN_API_NODE.compile() - prompt = SIMPLE_TEMPLATE.format(context=context, example=example, instruction=instruction) - # prompt_template, format_example = get_template(templates, format) - # format_example = format_example.format(project_name=CONFIG.project_name) - # prompt = prompt_template.format(context=context, format_example=format_example) - system_design = await self._aask_v1(prompt, "system_design", OUTPUT_MAPPING, format=format) - return system_design + node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format) + return node async def _merge(self, prd_doc, system_design_doc, format=CONFIG.prompt_format): - prompt = MERGE_PROMPT.format( - old_design=system_design_doc.content, context=prd_doc.content, project_name=CONFIG.project_name + context = NEW_REQ_TEMPLATE.format( + old_design=system_design_doc.content, context=prd_doc.content ) - system_design = await self._aask_v1(prompt, "system_design", OUTPUT_MAPPING, format=format) - # fix Python package name, we can't system_design.instruct_content.python_package_name = "xxx" since "Python - # package name" contain space, have to use setattr - system_design_doc.content = system_design.instruct_content.json(ensure_ascii=False) + node = await DESIGN_API_NODE.fill(context=context, llm=self.llm, to=format) + system_design_doc.content = node.instruct_content.json(ensure_ascii=False) return system_design_doc async def _update_system_design(self, filename, prds_file_repo, system_design_file_repo) -> Document: diff --git a/metagpt/actions/design_api_an.py b/metagpt/actions/design_api_an.py index b4bd54849..2db203606 100644 --- a/metagpt/actions/design_api_an.py +++ b/metagpt/actions/design_api_an.py @@ -6,6 +6,7 @@ @File : design_api_an.py """ from metagpt.actions.action_node import ActionNode +from metagpt.utils.mermaid import MMC1, MMC2 from metagpt.logs import logger IMPLEMENTATION_APPROACH = ActionNode( @@ -32,60 +33,10 @@ FILE_LIST = ActionNode( DATA_STRUCTURES_AND_INTERFACES = ActionNode( key="Data structures and interfaces", expected_type=str, - instruction="Use mermaid classDiagram code syntax, including classes (INCLUDING __init__ method) and functions " - "(with type annotations), CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " + instruction="Use mermaid classDiagram code syntax, including classes, method(__init__ etc.) and functions with type" + " annotations, CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", - example=""" classDiagram - class User { - +int id - +str username - +str email - +str password - __init__(id: int, username: str, email: str, password: str) - follow(user: User): void - like(content: Content): void - comment(content: Content, text: str): Comment - } - class Content { - +int id - +User author - +str title - +str body - +datetime created_at - +list likes - +list comments - __init__(id: int, author: User, title: str, body: str) - get_likes(): list - get_comments(): list - } - class Comment { - +int id - +User author - +str text - +datetime created_at - __init__(id: int, author: User, text: str) - } - class Leaderboard { - +list top_contents - update(): void - } - class SearchEngine { - +str query - search(): list - } - class RecommendationEngine { - +User user - recommend(): list - } - class TaskQueue { - +str task_name - enqueue(task: function): void - } - User "1" -- "*" Content: creates - Content "1" -- "*" Comment: includes - User "1" -- "*" Comment: writes - User "1" -- "*" User: follows - Content "1" -- "*" User: liked_by""" + example=MMC1 ) PROGRAM_CALL_FLOW = ActionNode( @@ -93,10 +44,7 @@ PROGRAM_CALL_FLOW = ActionNode( expected_type=str, instruction="Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE " "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", - example="""sequenceDiagram -participant M as Main -... -G->>M: end game""" + example=MMC2 ) ANYTHING_UNCLEAR = ActionNode( @@ -106,40 +54,21 @@ ANYTHING_UNCLEAR = ActionNode( example="Clarification needed on third-party API integration, ..." ) -ACTION_NODES = [ +NODES = [ IMPLEMENTATION_APPROACH, - PROJECT_NAME, + # PROJECT_NAME, FILE_LIST, DATA_STRUCTURES_AND_INTERFACES, PROGRAM_CALL_FLOW, ANYTHING_UNCLEAR ] -DESIGN_API_NODE = ActionNode("DesignAPI", str, "", "") -DESIGN_API_NODE.add_children(ACTION_NODES) - -SIMPLE_TEMPLATE = """ -## context -{context} - -## example -{example} - -## instruction-nodes: ": # " -{instruction} - -## instruction-action -Role: You are an architect; the goal is to design a SOTA software system -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirement: Fill in the above missing instruction-nodes based on the context -now, output wrapped inside [CONTENT][/CONTENT] as example, nothing else. -""" +DESIGN_API_NODE = ActionNode.from_children("DesignAPI", NODES) def main(): - instruction, example = DESIGN_API_NODE.compile() - text = SIMPLE_TEMPLATE.format(context="", example=example, instruction=instruction) - logger.info(text) + prompt = DESIGN_API_NODE.compile(context="") + logger.info(prompt) if __name__ == '__main__': diff --git a/metagpt/actions/project_management.py b/metagpt/actions/project_management.py index 95da0d65a..29e3bed3e 100644 --- a/metagpt/actions/project_management.py +++ b/metagpt/actions/project_management.py @@ -10,10 +10,11 @@ 3. According to the design in Section 2.2.3.5.4 of RFC 135, add incremental iteration functionality. """ import json -from typing import List +# from typing import List from metagpt.actions import ActionOutput from metagpt.actions.action import Action +from metagpt.actions.project_management_an import PM_NODE from metagpt.config import CONFIG from metagpt.const import ( PACKAGE_REQUIREMENTS_FILENAME, @@ -24,189 +25,14 @@ from metagpt.const import ( from metagpt.logs import logger from metagpt.schema import Document, Documents from metagpt.utils.file_repository import FileRepository -from metagpt.utils.get_template import get_template +# from metagpt.utils.get_template import get_template -templates = { - "json": { - "PROMPT_TEMPLATE": """ -# Context -{context} - -## Format example -{format_example} ------ -Role: You are a project manager; the goal is to break down tasks according to PRD/technical design, give a task list, and analyze task dependencies to start with the prerequisite modules -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirements: Based on the context, fill in the following missing information, each section name is a key in json. Here the granularity of the task is a file, if there are any missing files, you can supplement them -ATTENTION: Output carefully referenced "Format example" in format. - -## Required Python third-party packages: Provide Python list[str] in requirements.txt format - -## Required Other language third-party packages: Provide Python list[str] in requirements.txt format - -## Logic Analysis: Provided as a Python list[list[str]. the first is filename, the second is class/method/function should be implemented in this file. Analyze the dependencies between the files, which work should be done first - -## Task list: Provided as Python list[str]. Each str is a filename, the more at the beginning, the more it is a prerequisite dependency, should be done first - -## Full API spec: Use OpenAPI 3.0. Describe all APIs that may be used by both frontend and backend. - -## Shared Knowledge: Anything that should be public like utils' functions, config's variables details that should make clear first. - -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. For example, don't forget a main entry. don't forget to init 3rd party libs. - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like format example, -and only output the json inside this tag, nothing else -""", - "FORMAT_EXAMPLE": ''' -{ - "Required Python third-party packages": [ - "flask==1.1.2", - "bcrypt==3.2.0" - ], - "Required Other language third-party packages": [ - "No third-party ..." - ], - "Logic Analysis": [ - ["game.py", "Contains..."] - ], - "Task list": [ - "game.py" - ], - "Full API spec": """ - openapi: 3.0.0 - ... - description: A JSON object ... - """, - "Shared Knowledge": """ - 'game.py' contains ... - """, - "Anything UNCLEAR": "We need ... how to start." -} -''', - }, - "markdown": { - "PROMPT_TEMPLATE": """ -# Context -{context} - -## Format example -{format_example} ------ -Role: You are a project manager; the goal is to break down tasks according to PRD/technical design, give a task list, and analyze task dependencies to start with the prerequisite modules -Requirements: Based on the context, fill in the following missing information, note that all sections are returned in Python code triple quote form seperatedly. Here the granularity of the task is a file, if there are any missing files, you can supplement them -Attention: Use '##' to split sections, not '#', and '## ' SHOULD WRITE BEFORE the code and triple quote. - -## Required Python third-party packages: Provided in requirements.txt format - -## Required Other language third-party packages: Provided in requirements.txt format - -## Logic Analysis: Provided as a Python list[list[str]. the first is filename, the second is class/method/function should be implemented in this file. Analyze the dependencies between the files, which work should be done first - -## Task list: Provided as Python list[str]. Each str is a filename, the more at the beginning, the more it is a prerequisite dependency, should be done first - -## Full API spec: Use OpenAPI 3.0. Describe all APIs that may be used by both frontend and backend. - -## Shared Knowledge: Anything that should be public like utils' functions, config's variables details that should make clear first. - -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. For example, don't forget a main entry. don't forget to init 3rd party libs. - -""", - "FORMAT_EXAMPLE": ''' ---- -## Required Python third-party packages -```python -""" -flask==1.1.2 -bcrypt==3.2.0 -""" -``` - -## Required Other language third-party packages -```python -""" -No third-party ... -""" -``` - -## Full API spec -```python -""" -openapi: 3.0.0 -... -description: A JSON object ... -""" -``` - -## Logic Analysis -```python -[ - ["index.js", "Contains ..."], - ["main.py", "Contains ..."], -] -``` - -## Task list -```python -[ - "index.js", - "main.py", -] -``` - -## Shared Knowledge -```python -""" -'game.py' contains ... -""" -``` - -## Anything UNCLEAR -We need ... how to start. ---- -''', - }, -} -OUTPUT_MAPPING = { - "Required Python third-party packages": (List[str], ...), - "Required Other language third-party packages": (List[str], ...), - "Full API spec": (str, ...), - "Logic Analysis": (List[List[str]], ...), - "Task list": (List[str], ...), - "Shared Knowledge": (str, ...), - "Anything UNCLEAR": (str, ...), -} - -MERGE_PROMPT = """ -# Context -{context} - -## Old Tasks +NEW_REQ_TEMPLATE = """ +### Legacy Content {old_tasks} ------ -## Format example -{format_example} ------ -Role: You are a project manager; The goal is to merge the new PRD/technical design content from 'Context' into 'Old Tasks.' Based on this merged result, break down tasks, give a task list, and analyze task dependencies to start with the prerequisite modules. -Requirements: Based on the context, fill in the following missing information, each section name is a key in json. Here the granularity of the task is a file, if there are any missing files, you can supplement them -Attention: Use '##' to split sections, not '#', and '## ' SHOULD WRITE BEFORE the code and triple quote. - -## Required Python third-party packages: Provided in requirements.txt format - -## Required Other language third-party packages: Provided in requirements.txt format - -## Full API spec: Use OpenAPI 3.0. Describe all APIs that may be used by both frontend and backend. - -## Logic Analysis: Provided as a Python list[list[str]. the first is filename, the second is class/method/function should be implemented in this file. Analyze the dependencies between the files, which work should be done first - -## Task list: Provided as Python list[str]. Each str is a filename, the more at the beginning, the more it is a prerequisite dependency, should be done first - -## Shared Knowledge: Anything that should be public like utils' functions, config's variables details that should make clear first. - -## Anything UNCLEAR: Provide as Plain text. Make clear here. For example, don't forget a main entry. don't forget to init 3rd party libs. - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like "Format example" format, -and only output the json inside this tag, nothing else +### New Requirements +{context} """ @@ -262,18 +88,16 @@ class WriteTasks(Action): return task_doc async def _run_new_tasks(self, context, format=CONFIG.prompt_format): - prompt_template, format_example = get_template(templates, format) - prompt = prompt_template.format(context=context, format_example=format_example) - rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format) - return rsp + node = await PM_NODE.fill(context, self.llm, format) + # prompt_template, format_example = get_template(templates, format) + # prompt = prompt_template.format(context=context, format_example=format_example) + # rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format) + return node async def _merge(self, system_design_doc, task_doc, format=CONFIG.prompt_format) -> Document: - _, format_example = get_template(templates, format) - prompt = MERGE_PROMPT.format(context=system_design_doc.content, old_tasks=task_doc.content, - format_example=format_example) - rsp = await self._aask_v1(prompt, "task", OUTPUT_MAPPING, format=format) - task_doc.content = rsp.instruct_content.json(ensure_ascii=False) - return task_doc + context = NEW_REQ_TEMPLATE.format(context=system_design_doc.content, old_tasks=task_doc.content) + node = await PM_NODE.fill(context, self.llm, format) + return node @staticmethod async def _update_requirements(doc): diff --git a/metagpt/actions/project_management_an.py b/metagpt/actions/project_management_an.py new file mode 100644 index 000000000..aa7cdcde2 --- /dev/null +++ b/metagpt/actions/project_management_an.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/12/14 15:28 +@Author : alexanderwu +@File : project_management_an.py +""" +from metagpt.actions.action_node import ActionNode +from metagpt.logs import logger + +REQUIRED_PYTHON_PACKAGES = ActionNode( + key="Required Python packages", + expected_type=list[str], + instruction="Provide required Python packages in requirements.txt format.", + example=["flask==1.1.2", "bcrypt==3.2.0"] +) + +REQUIRED_OTHER_LANGUAGE_PACKAGES = ActionNode( + key="Required Other language third-party packages", + expected_type=list[str], + instruction="List down the required packages for languages other than Python.", + example=["No third-party dependencies required"] +) + +LOGIC_ANALYSIS = ActionNode( + key="Logic Analysis", + expected_type=list[list[str]], + instruction="Provide a list of files with the classes/methods/functions to be implemented, " + "including dependency analysis and imports.", + example=[["game.py", "Contains Game class and ... functions"], + ["main.py", "Contains main function, from game import Game"]] +) + +TASK_LIST = ActionNode( + key="Task list", + expected_type=list[str], + instruction="Break down the tasks into a list of filenames, prioritized by dependency order.", + example=["game.py", "main.py"] +) + +FULL_API_SPEC = ActionNode( + key="Full API spec", + expected_type=str, + instruction="Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend.", + example="openapi: 3.0.0 ..." +) + +SHARED_KNOWLEDGE = ActionNode( + key="Shared Knowledge", + expected_type=str, + instruction="Detail any shared knowledge, like common utility functions or configuration variables.", + example="'game.py' contains functions shared across the project." +) + +ANYTHING_UNCLEAR_PM = ActionNode( + key="Anything UNCLEAR", + expected_type=str, + instruction="Mention any unclear aspects in the project management context and try to clarify them.", + example="Clarification needed on how to start and initialize third-party libraries." +) + +NODES = [ + REQUIRED_PYTHON_PACKAGES, + REQUIRED_OTHER_LANGUAGE_PACKAGES, + LOGIC_ANALYSIS, + TASK_LIST, + FULL_API_SPEC, + SHARED_KNOWLEDGE, + ANYTHING_UNCLEAR_PM +] + + +PM_NODE = ActionNode.from_children("PM_NODE", NODES) + + +def main(): + prompt = PM_NODE.compile(context="") + logger.info(prompt) + + +if __name__ == '__main__': + main() diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index 0594d116e..e61743e7f 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -14,9 +14,11 @@ from __future__ import annotations import json from pathlib import Path -from typing import List +# from typing import List from metagpt.actions import Action, ActionOutput +from metagpt.actions.action_node import ActionNode +from metagpt.actions.write_prd_an import WRITE_PRD_NODE, WP_ISSUE_TYPE_NODE, WP_IS_RELATIVE_NODE from metagpt.actions.fix_bug import FixBug from metagpt.actions.search_and_summarize import SearchAndSummarize from metagpt.config import CONFIG @@ -31,293 +33,26 @@ from metagpt.logs import logger from metagpt.schema import Document, Documents, Message, BugFixContext from metagpt.utils.common import CodeParser from metagpt.utils.file_repository import FileRepository -from metagpt.utils.get_template import get_template +# from metagpt.utils.get_template import get_template from metagpt.utils.mermaid import mermaid_to_file -templates = { - "json": { - "PROMPT_TEMPLATE": """ -# Context -{{ - "Original Requirements": "{requirements}", - "Search Information": "" -}} +CONTEXT_TEMPLATE = """ +### Project Name +{project_name} -## Format example -{format_example} ------ -Role: You are a professional product manager; the goal is to design a concise, usable, efficient product -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirements: According to the context, fill in the following missing information, note that each sections are returned in Python code triple quote form seperatedly. -ATTENTION: Output carefully referenced "Format example" in format. - -## YOU NEED TO FULFILL THE BELOW JSON DOC - -{{ - "Language": "", # str, use the same language as the user requirement. en_us / zh_cn etc. - "Original Requirements": "", # str, place the polished complete original requirements here - "Project Name": "{project_name}", # str, if it's empty, name it with snake case style, like game_2048 / web_2048 / simple_crm etc. - "Search Information": "", - "Requirements": "", - "Product Goals": [], # Provided as Python list[str], up to 3 clear, orthogonal product goals. - "User Stories": [], # Provided as Python list[str], up to 5 scenario-based user stories - "Competitive Analysis": [], # Provided as Python list[str], up to 8 competitive product analyses - # Use mermaid quadrantChart code syntax. up to 14 competitive products. Translation: Distribute these competitor scores evenly between 0 and 1, trying to conform to a normal distribution centered around 0.5 as much as possible. - "Competitive Quadrant Chart": "quadrantChart - title Reach and engagement of campaigns - x-axis Low Reach --> High Reach - y-axis Low Engagement --> High Engagement - quadrant-1 We should expand - quadrant-2 Need to promote - quadrant-3 Re-evaluate - quadrant-4 May be improved - Campaign A: [0.3, 0.6] - Campaign B: [0.45, 0.23] - Campaign C: [0.57, 0.69] - Campaign D: [0.78, 0.34] - Campaign E: [0.40, 0.34] - Campaign F: [0.35, 0.78]", - "Requirement Analysis": "", # Provide as Plain text. - "Requirement Pool": [["P0","P0 requirement"],["P1","P1 requirement"]], # Provided as Python list[list[str], the parameters are requirement description, priority(P0/P1/P2), respectively, comply with PEP standards - "UI Design draft": "", # Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description. - "Anything UNCLEAR": "", # Provide as Plain text. Try to clarify it. -}} - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like format example, -and only output the json inside this tag, nothing else -""", - "FORMAT_EXAMPLE": """ -[CONTENT] -{{ - "Language": "", - "Original Requirements": "", - "Project Name": "{project_name}", - "Search Information": "", - "Requirements": "", - "Product Goals": [], - "User Stories": [], - "Competitive Analysis": [], - "Competitive Quadrant Chart": "quadrantChart - title Reach and engagement of campaigns - x-axis Low Reach --> High Reach - y-axis Low Engagement --> High Engagement - quadrant-1 We should expand - quadrant-2 Need to promote - quadrant-3 Re-evaluate - quadrant-4 May be improved - Campaign A: [0.3, 0.6] - Campaign B: [0.45, 0.23] - Campaign C: [0.57, 0.69] - Campaign D: [0.78, 0.34] - Campaign E: [0.40, 0.34] - Campaign F: [0.35, 0.78]", - "Requirement Analysis": "", - "Requirement Pool": [["P0","P0 requirement"],["P1","P1 requirement"]], - "UI Design draft": "", - "Anything UNCLEAR": "", -}} -[/CONTENT] -""", - }, - "markdown": { - "PROMPT_TEMPLATE": """ -# Context -## Original Requirements +### Original Requirements {requirements} -## Search Information -{search_information} - -## mermaid quadrantChart code syntax example. DONT USE QUOTO IN CODE DUE TO INVALID SYNTAX. Replace the with REAL COMPETITOR NAME -```mermaid -quadrantChart - title Reach and engagement of campaigns - x-axis Low Reach --> High Reach - y-axis Low Engagement --> High Engagement - quadrant-1 We should expand - quadrant-2 Need to promote - quadrant-3 Re-evaluate - quadrant-4 May be improved - "Campaign: A": [0.3, 0.6] - "Campaign B": [0.45, 0.23] - "Campaign C": [0.57, 0.69] - "Campaign D": [0.78, 0.34] - "Campaign E": [0.40, 0.34] - "Campaign F": [0.35, 0.78] - "Our Target Product": [0.5, 0.6] -``` - -## Format example -{format_example} ------ -Role: You are a professional product manager; the goal is to design a concise, usable, efficient product -Language: Please use the same language as the user requirement to answer, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirements: According to the context, fill in the following missing information, note that each sections are returned in Python code triple quote form seperatedly. -ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. AND '## ' SHOULD WRITE BEFORE the code and triple quote. Output carefully referenced "Format example" in format. - -## Language: Provide as Plain text, use the same language as the user requirement. - -## Original Requirements: Provide as Plain text, place the polished complete original requirements here - -## Product Goals: Provided as Python list[str], up to 3 clear, orthogonal product goals. - -## User Stories: Provided as Python list[str], up to 5 scenario-based user stories - -## Competitive Analysis: Provided as Python list[str], up to 7 competitive product analyses, consider as similar competitors as possible - -## Competitive Quadrant Chart: Use mermaid quadrantChart code syntax. up to 14 competitive products. Translation: Distribute these competitor scores evenly between 0 and 1, trying to conform to a normal distribution centered around 0.5 as much as possible. - -## Requirement Analysis: Provide as Plain text. - -## Requirement Pool: Provided as Python list[list[str], the parameters are requirement description, priority(P0/P1/P2), respectively, comply with PEP standards - -## UI Design draft: Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description. -## Anything UNCLEAR: Provide as Plain text. Try to clarify it. -""", - "FORMAT_EXAMPLE": """ ---- -## Original Requirements -The user ... - -## Product Goals -```python -[ - "Create a ...", -] -``` - -## User Stories -```python -[ - "As a user, ...", -] -``` - -## Competitive Analysis -```python -[ - "Python Snake Game: ...", -] -``` - -## Competitive Quadrant Chart -```mermaid -quadrantChart - title Reach and engagement of campaigns - ... - "Our Target Product": [0.6, 0.7] -``` - -## Requirement Analysis -The product should be a ... - -## Requirement Pool -```python -[ - ["End game ...", "P0"] -] -``` - -## UI Design draft -Give a basic function description, and a draft - -## Anything UNCLEAR -There are no unclear points. ---- -""", - }, -} - -OUTPUT_MAPPING = { - "Language": (str, ...), - "Original Requirements": (str, ...), - "Project Name": (str, ...), - "Product Goals": (List[str], ...), - "User Stories": (List[str], ...), - "Competitive Analysis": (List[str], ...), - "Competitive Quadrant Chart": (str, ...), - "Requirement Analysis": (str, ...), - "Requirement Pool": (List[List[str]], ...), - "UI Design draft": (str, ...), - "Anything UNCLEAR": (str, ...), -} - -IS_RELATIVE_PROMPT = """ -## PRD: -{old_prd} - -## New Requirement: -{requirements} - -___ -You are a professional product manager; You need to assess whether the new requirements are relevant to the existing PRD to determine whether to merge the new requirements into this PRD. -Is the newly added requirement in "New Requirement" related to the PRD? -Respond with `YES` if it is related, `NO` if it is not, and provide the reasons. Return the response in JSON format. +### Search Information +- """ -MERGE_PROMPT = """ -# Context -## Original Requirements -{requirements} - - -## Old PRD +NEW_REQ_TEMPLATE = """ +### Legacy Content {old_prd} ------ -Role: You are a professional product manager; The goal is to incorporate the newly added requirements from the "Original Requirements" into the existing Product Requirements Document (PRD) in the "Old PRD" in order to design a concise, usable, and efficient product. -Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. -Requirements: According to the context, fill in the following missing information, each section name is a key in json ,If the requirements are unclear, ensure minimum viability and avoid excessive design -ATTENTION: Output carefully referenced "Old PRD" in format. -## YOU NEED TO FULFILL THE BELOW JSON DOC - -{{ - "Language": "", # str, use the same language as the user requirement. en_us / zh_cn etc. - "Original Requirements": "", # str, place the polished complete original requirements here - "Project Name": "{project_name}", # str, if it's empty, name it with snake case style, like game_2048 / web_2048 / simple_crm etc. - "Search Information": "", - "Requirements": "", - "Product Goals": [], # Provided as Python list[str], up to 3 clear, orthogonal product goals. - "User Stories": [], # Provided as Python list[str], up to 5 scenario-based user stories - "Competitive Analysis": [], # Provided as Python list[str], up to 8 competitive product analyses - # Use mermaid quadrantChart code syntax. up to 14 competitive products. Translation: Distribute these competitor scores evenly between 0 and 1, trying to conform to a normal distribution centered around 0.5 as much as possible. - "Competitive Quadrant Chart": "quadrantChart - title Reach and engagement of campaigns - x-axis Low Reach --> High Reach - y-axis Low Engagement --> High Engagement - quadrant-1 We should expand - quadrant-2 Need to promote - quadrant-3 Re-evaluate - quadrant-4 May be improved - Campaign A: [0.3, 0.6] - Campaign B: [0.45, 0.23] - Campaign C: [0.57, 0.69] - Campaign D: [0.78, 0.34] - Campaign E: [0.40, 0.34] - Campaign F: [0.35, 0.78]", - "Requirement Analysis": "", # Provide as Plain text. - "Requirement Pool": [["P0","P0 requirement"],["P1","P1 requirement"]], # Provided as Python list[list[str], the parameters are requirement description, priority(P0/P1/P2), respectively, comply with PEP standards - "UI Design draft": "", # Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description. - "Anything UNCLEAR": "", # Provide as Plain text. Try to clarify it. -}} - -output a properly formatted JSON, wrapped inside [CONTENT][/CONTENT] like "Old PRD" format, -and only output the json inside this tag, nothing else -""" - -IS_BUGFIX_PROMPT = """ -{content} - -___ -You are a professional product manager; You need to determine whether the above content describes a requirement or provides feedback about a bug. -Respond with `YES` if it is a feedback about a bug, `NO` if it is not, and provide the reasons. Return the response in JSON format like below: - -```json -{{ - "is_bugfix": ..., # `YES` or `NO` - "reason": ..., # reason string -}} -``` +### New Requirements +{requirements} """ @@ -335,7 +70,7 @@ class WritePRD(Action): await docs_file_repo.save(filename=REQUIREMENT_FILENAME, content="") bug_fix = BugFixContext(filename=BUGFIX_FILENAME) return Message(content=bug_fix.json(), instruct_content=bug_fix, - role=self.profile, + role="", cause_by=FixBug, sent_from=self, send_to="Alex", # the name of Engineer @@ -353,7 +88,7 @@ class WritePRD(Action): if not prd_doc: continue change_files.docs[prd_doc.filename] = prd_doc - logger.info(f"REWRITE PRD:{prd_doc.filename}") + logger.info(f"rewrite prd: {prd_doc.filename}") # If there is no existing PRD, generate one using 'docs/requirement.txt'. if not change_files.docs: prd_doc = await self._update_prd( @@ -367,47 +102,32 @@ class WritePRD(Action): # optimization in subsequent steps. return ActionOutput(content=change_files.json(), instruct_content=change_files) - async def _run_new_requirement(self, requirements, format=CONFIG.prompt_format, *args, **kwargs) -> ActionOutput: - sas = SearchAndSummarize() - # rsp = await sas.run(context=requirements, system_text=SEARCH_AND_SUMMARIZE_SYSTEM_EN_US) - rsp = "" - info = f"### Search Results\n{sas.result}\n\n### Search Summary\n{rsp}" - if sas.result: - logger.info(sas.result) - logger.info(rsp) - - # logger.info(format) - prompt_template, format_example = get_template(templates, format) + async def _run_new_requirement(self, requirements, format=CONFIG.prompt_format) -> ActionOutput: + # sas = SearchAndSummarize() + # # rsp = await sas.run(context=requirements, system_text=SEARCH_AND_SUMMARIZE_SYSTEM_EN_US) + # rsp = "" + # info = f"### Search Results\n{sas.result}\n\n### Search Summary\n{rsp}" + # if sas.result: + # logger.info(sas.result) + # logger.info(rsp) project_name = CONFIG.project_name if CONFIG.project_name else "" - format_example = format_example.format(project_name=project_name) - # logger.info(prompt_template) - # logger.info(format_example) - prompt = prompt_template.format( - requirements=requirements, search_information=info, format_example=format_example, project_name=project_name - ) - # logger.info(prompt) - # prd = await self._aask_v1(prompt, "prd", OUTPUT_MAPPING) - prd = await self._aask_v1(prompt, "prd", OUTPUT_MAPPING, format=format) - await self._rename_workspace(prd) - return prd + context = CONTEXT_TEMPLATE.format(requirements=requirements, project_name=project_name) + node = await WRITE_PRD_NODE.fill(context=context, llm=self.llm, to=format) + await self._rename_workspace(node) + return node - async def _is_relative_to(self, new_requirement_doc, old_prd_doc) -> bool: - prompt = IS_RELATIVE_PROMPT.format(old_prd=old_prd_doc.content, requirements=new_requirement_doc.content) - res = await self._aask(prompt=prompt) - logger.info(f"REQ-RELATIVE: [{new_requirement_doc.root_relative_path}, {old_prd_doc.root_relative_path}]: {res}") - if "YES" in res: - return True - return False + async def _is_relative(self, new_requirement_doc, old_prd_doc) -> bool: + context = NEW_REQ_TEMPLATE.format(old_prd=old_prd_doc.content, requirements=new_requirement_doc.content) + node = await WP_IS_RELATIVE_NODE.fill(context, self.llm) + return node.get("is_relative") == "YES" async def _merge(self, new_requirement_doc, prd_doc, format=CONFIG.prompt_format) -> Document: if not CONFIG.project_name: CONFIG.project_name = Path(CONFIG.project_path).name - prompt = MERGE_PROMPT.format( - requirements=new_requirement_doc.content, old_prd=prd_doc.content, project_name=CONFIG.project_name - ) - prd = await self._aask_v1(prompt, "prd", OUTPUT_MAPPING, format=format) - prd_doc.content = prd.instruct_content.json(ensure_ascii=False) - await self._rename_workspace(prd) + prompt = NEW_REQ_TEMPLATE.format(requirements=new_requirement_doc.content, old_prd=prd_doc.content) + node = await WRITE_PRD_NODE.fill(context=prompt, llm=self.llm, to=format) + prd_doc.content = node.instruct_content.json(ensure_ascii=False) + await self._rename_workspace(node) return prd_doc async def _update_prd(self, requirement_doc, prd_doc, prds_file_repo, *args, **kwargs) -> Document | None: @@ -418,7 +138,7 @@ class WritePRD(Action): filename=FileRepository.new_filename() + ".json", content=prd.instruct_content.json(ensure_ascii=False), ) - elif await self._is_relative_to(requirement_doc, prd_doc): + elif await self._is_relative(requirement_doc, prd_doc): new_prd_doc = await self._merge(requirement_doc, prd_doc) else: return None @@ -453,17 +173,13 @@ class WritePRD(Action): return if not CONFIG.project_name: - if isinstance(prd, ActionOutput): + if isinstance(prd, ActionOutput) or isinstance(prd, ActionNode): ws_name = prd.instruct_content.dict()["Project Name"] else: ws_name = CodeParser.parse_str(block="Project Name", text=prd) CONFIG.project_name = ws_name CONFIG.git_repo.rename_root(CONFIG.project_name) - async def _is_bugfix(self, content): - prompt = IS_BUGFIX_PROMPT.format(content=content) - res = await self._aask(prompt=prompt) - logger.info(f"IS_BUGFIX:{res}") - if "YES" in res: - return True - return False + async def _is_bugfix(self, context) -> bool: + node = await WP_ISSUE_TYPE_NODE.fill(context, self.llm) + return node.get("issue_type") == "BUG" diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py new file mode 100644 index 000000000..7368621ea --- /dev/null +++ b/metagpt/actions/write_prd_an.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/12/14 11:40 +@Author : alexanderwu +@File : write_prd_an.py +""" + +from metagpt.actions.action_node import ActionNode +from metagpt.logs import logger + +LANGUAGE = ActionNode( + key="Language", + expected_type=str, + instruction="Provide the language used in the project, typically matching the user's requirement language.", + example="en_us" +) + +ORIGINAL_REQUIREMENTS = ActionNode( + key="Original Requirements", + expected_type=str, + instruction="Place the polished, complete original requirements here.", + example="The game should have a leaderboard and multiple difficulty levels." +) + +PROJECT_NAME = ActionNode( + key="Project Name", + expected_type=str, + instruction="Name the project using snake case style, like 'game_2048' or 'simple_crm'.", + example="game_2048" +) + +PRODUCT_GOALS = ActionNode( + key="Product Goals", + expected_type=list[str], + instruction="Provide up to three clear, orthogonal product goals.", + example=["Create an engaging user experience", + "Ensure high performance", + "Provide customizable features"] +) + +USER_STORIES = ActionNode( + key="User Stories", + expected_type=list[str], + instruction="Provide up to five scenario-based user stories.", + example=["As a user, I want to be able to choose difficulty levels", + "As a player, I want to see my score after each game"] +) + +COMPETITIVE_ANALYSIS = ActionNode( + key="Competitive Analysis", + expected_type=list[str], + instruction="Provide analyses for up to seven competitive products.", + example=["Python Snake Game: Simple interface, lacks advanced features"] +) + +COMPETITIVE_QUADRANT_CHART = ActionNode( + key="Competitive Quadrant Chart", + expected_type=str, + instruction="Use mermaid quadrantChart syntax. Distribute scores evenly between 0 and 1", + example="""quadrantChart + title Reach and engagement of campaigns + x-axis Low Reach --> High Reach + y-axis Low Engagement --> High Engagement + quadrant-1 We should expand + quadrant-2 Need to promote + quadrant-3 Re-evaluate + quadrant-4 May be improved + "Campaign: A": [0.3, 0.6] + "Campaign B": [0.45, 0.23] + "Campaign C": [0.57, 0.69] + "Campaign D": [0.78, 0.34] + "Campaign E": [0.40, 0.34] + "Campaign F": [0.35, 0.78] + "Our Target Product": [0.5, 0.6]""" +) + +REQUIREMENT_ANALYSIS = ActionNode( + key="Requirement Analysis", + expected_type=str, + instruction="Provide a detailed analysis of the requirements.", + example="The product should be user-friendly and performance-optimized." +) + +REQUIREMENT_POOL = ActionNode( + key="Requirement Pool", + expected_type=list[list[str]], + instruction="List down the requirements with their priority (P0, P1, P2).", + example=[["P0", "High priority requirement"], ["P1", "Medium priority requirement"]] +) + +UI_DESIGN_DRAFT = ActionNode( + key="UI Design draft", + expected_type=str, + instruction="Provide a simple description of UI elements, functions, style, and layout.", + example="Basic function description with a simple style and layout." +) + +ANYTHING_UNCLEAR = ActionNode( + key="Anything UNCLEAR", + expected_type=str, + instruction="Mention any aspects of the project that are unclear and try to clarify them.", + example="..." +) + +ISSUE_TYPE = ActionNode( + key="issue_type", + expected_type=str, + instruction="Answer BUG/REQUIREMENT. If it is a bugfix, answer Bug, otherwise answer Requirement", + example="BUG" +) + +IS_RELATIVE = ActionNode( + key="is_relative", + expected_type=str, + instruction="Answer YES/NO. If the requirement is related to the old PRD, answer YES, otherwise NO", + example="YES" +) + +REASON = ActionNode( + key="reason", + expected_type=str, + instruction="Explain the reasoning process from question to answer", + example="..." +) + + +NODES = [ + LANGUAGE, + ORIGINAL_REQUIREMENTS, + PROJECT_NAME, + PRODUCT_GOALS, + USER_STORIES, + COMPETITIVE_ANALYSIS, + COMPETITIVE_QUADRANT_CHART, + REQUIREMENT_ANALYSIS, + REQUIREMENT_POOL, + UI_DESIGN_DRAFT, + ANYTHING_UNCLEAR +] + +WRITE_PRD_NODE = ActionNode.from_children("WritePRD", NODES) +WP_ISSUE_TYPE_NODE = ActionNode.from_children("WP_ISSUE_TYPE", [ISSUE_TYPE, REASON]) +WP_IS_RELATIVE_NODE = ActionNode.from_children("WP_IS_RELATIVE", [IS_RELATIVE, REASON]) + + +def main(): + prompt = WRITE_PRD_NODE.compile(context="") + logger.info(prompt) + + +if __name__ == '__main__': + main() diff --git a/metagpt/environment.py b/metagpt/environment.py index 7d1e307f3..89b6f9d46 100644 --- a/metagpt/environment.py +++ b/metagpt/environment.py @@ -82,7 +82,7 @@ class Environment(BaseModel): futures.append(future) await asyncio.gather(*futures) - logger.info(f"is idle: {self.is_idle}") + logger.debug(f"is idle: {self.is_idle}") def get_roles(self) -> dict[str, Role]: """获得环境内的所有角色 diff --git a/metagpt/llm.py b/metagpt/llm.py index d8d06c0a1..eaa4880a5 100644 --- a/metagpt/llm.py +++ b/metagpt/llm.py @@ -12,11 +12,12 @@ from metagpt.provider.human_provider import HumanProvider from metagpt.provider.openai_api import OpenAIGPTAPI from metagpt.provider.spark_api import SparkAPI from metagpt.provider.zhipuai_api import ZhiPuAIGPTAPI +from metagpt.provider.base_gpt_api import BaseGPTAPI _ = HumanProvider() # Avoid pre-commit error -def LLM() -> "BaseGPTAPI": +def LLM() -> BaseGPTAPI: """initialize different LLM instance according to the key field existence""" # TODO a little trick, can use registry to initialize LLM instance further if CONFIG.openai_api_key: diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index 15d5fe5b1..b80ef85be 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -26,8 +26,8 @@ class Architect(Role): self, name: str = "Bob", profile: str = "Architect", - goal: str = "Design a concise, usable, complete python system", - constraints: str = "Try to specify good open source tools as much as possible", + goal: str = "design a concise, usable, complete software system", + constraints: str = "make sure the architecture is simple enough and use appropriate open source libraries" ) -> None: """Initializes the Architect with given attributes.""" super().__init__(name, profile, goal, constraints) diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index cedd2101f..844f3589d 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -71,14 +71,15 @@ class Engineer(Role): self, name: str = "Alex", profile: str = "Engineer", - goal: str = "Write elegant, readable, extensible, efficient code", - constraints: str = "The code should conform to standards like PEP8 and be modular and maintainable", + goal: str = "write elegant, readable, extensible, efficient code", + constraints: str = "the code should conform to standards like PEP8 and be modular and maintainable", n_borg: int = 1, use_code_review: bool = False, ) -> None: """Initializes the Engineer role with given attributes.""" super().__init__(name, profile, goal, constraints) self.use_code_review = use_code_review + self._init_actions([WriteCode]) self._watch([WriteTasks, SummarizeCode, WriteCode, WriteCodeReview, FixBug]) self.code_todos = [] self.summarize_todos = [] @@ -198,11 +199,11 @@ class Engineer(Role): return None msg = self._rc.news[0] if msg.cause_by in write_code_filters: - logger.info(f"TODO WriteCode:{msg.json()}") + logger.debug(f"TODO WriteCode:{msg.json()}") await self._new_code_actions(bug_fix=msg.cause_by == any_to_str(FixBug)) return self._rc.todo if msg.cause_by in summarize_code_filters and msg.sent_from == any_to_str(self): - logger.info(f"TODO SummarizeCode:{msg.json()}") + logger.debug(f"TODO SummarizeCode:{msg.json()}") await self._new_summarize_actions() return self._rc.todo return None diff --git a/metagpt/roles/project_manager.py b/metagpt/roles/project_manager.py index 7e7c5699d..37090b24f 100644 --- a/metagpt/roles/project_manager.py +++ b/metagpt/roles/project_manager.py @@ -25,7 +25,8 @@ class ProjectManager(Role): self, name: str = "Eve", profile: str = "Project Manager", - goal: str = "Improve team efficiency and deliver with quality and quantity", + goal: str = "break down tasks according to PRD/technical design, generate a task list, and analyze task " + "dependencies to start with the prerequisite modules", constraints: str = "", ) -> None: """ diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 52ac3cf28..7c9341adb 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -26,6 +26,7 @@ from typing import Iterable, Set, Type from pydantic import BaseModel, Field from metagpt.actions import Action, ActionOutput +from metagpt.actions.action_node import ActionNode from metagpt.config import CONFIG from metagpt.llm import LLM, HumanProvider from metagpt.logs import logger @@ -156,7 +157,7 @@ class Role: f"as Role's {str(action)} was initialized using LLM, try passing in Action classes instead of initialized instances" ) i = action - i.set_env(self._rc.env) + # i.set_env(self._rc.env) i.set_prefix(self._get_prefix(), self.profile) self._actions.append(i) self._states.append(f"{idx}. {action}") @@ -278,7 +279,7 @@ class Role: async def _act(self) -> Message: logger.info(f"{self._setting}: ready to {self._rc.todo}") response = await self._rc.todo.run(self._rc.important_memory) - if isinstance(response, ActionOutput): + if isinstance(response, ActionOutput) or isinstance(response, ActionNode): msg = Message( content=response.content, instruct_content=response.instruct_content, diff --git a/metagpt/roles/searcher.py b/metagpt/roles/searcher.py index bee8d3986..5760202ff 100644 --- a/metagpt/roles/searcher.py +++ b/metagpt/roles/searcher.py @@ -8,6 +8,7 @@ the `cause_by` value in the `Message` to a string to support the new message distribution feature. """ from metagpt.actions import ActionOutput, SearchAndSummarize +from metagpt.actions.action_node import ActionNode from metagpt.logs import logger from metagpt.roles import Role from metagpt.schema import Message @@ -58,7 +59,7 @@ class Searcher(Role): logger.info(f"{self._setting}: ready to {self._rc.todo}") response = await self._rc.todo.run(self._rc.memory.get(k=0)) - if isinstance(response, ActionOutput): + if isinstance(response, ActionOutput) or isinstance(response, ActionNode): msg = Message( content=response.content, instruct_content=response.instruct_content, From 39cb66359505edef07b9e1fb5f5c1f341372bcec Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 16:21:56 +0800 Subject: [PATCH 22/38] fix typo --- metagpt/actions/action_node.py | 2 +- metagpt/actions/write_prd_an.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 178986ebe..96c175ccb 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -285,7 +285,7 @@ class ActionNode: def action_node_from_tuple_example(): # 示例:列表中包含元组 list_of_tuples = [ - ("key1", int, "Instruction 1", "Example 1") + ("key1", str, "Instruction 1", "Example 1") ] # 从列表中创建 ActionNode 实例 diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 7368621ea..0781760ba 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -106,7 +106,7 @@ ANYTHING_UNCLEAR = ActionNode( ISSUE_TYPE = ActionNode( key="issue_type", expected_type=str, - instruction="Answer BUG/REQUIREMENT. If it is a bugfix, answer Bug, otherwise answer Requirement", + instruction="Answer BUG/REQUIREMENT. If it is a bugfix, answer BUG, otherwise answer Requirement", example="BUG" ) From 609d75a07eba441dcba4c3c2ea0644f9836f6d5a Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 18:06:43 +0800 Subject: [PATCH 23/38] add programming language as input, add complex strgy to ActionNode.fill method, fix quadrantChart in chinese etc. --- metagpt/actions/action_node.py | 74 ++++++++++++++++++++---- metagpt/actions/project_management_an.py | 2 +- metagpt/actions/write_prd_an.py | 24 +++++--- 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 96c175ccb..b1fbdaae9 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -28,6 +28,8 @@ SIMPLE_TEMPLATE = """ ## context {context} +----- + ## format example {example} @@ -38,7 +40,7 @@ SIMPLE_TEMPLATE = """ {constraint} ## action -Fill in the above nodes based on the context. Answer in format example. +Fill in the above nodes based on the format example. """ @@ -108,6 +110,16 @@ class ActionNode: """获得子ActionNode的字典,以key索引""" return {k: (v.expected_type, ...) for k, v in self.children.items()} + def get_self_mapping(self) -> Dict[str, Type]: + """get self key: type mapping""" + return {self.key: (self.expected_type, ...)} + + def get_mapping(self, mode="children") -> Dict[str, Type]: + """get key: type mapping under mode""" + if mode == "children" or (mode=="auto" and self.children): + return self.get_children_mapping() + return self.get_self_mapping() + @classmethod def create_model_class(cls, class_name: str, mapping: Dict[str, Type]): """基于pydantic v1的模型动态生成,用来检验结果类型正确性""" @@ -160,8 +172,8 @@ class ActionNode: mapping = self.get_children_mapping() return self.create_model_class(class_name, mapping) - def to_dict(self, format_func=None, mode="all") -> Dict: - """将当前节点与子节点都按照node: format的格式组织称字典""" + def to_dict(self, format_func=None, mode="auto") -> Dict: + """将当前节点与子节点都按照node: format的格式组织成字典""" # 如果没有提供格式化函数,使用默认的格式化方式 if format_func is None: @@ -171,7 +183,7 @@ class ActionNode: formatted_value = format_func(self) # 创建当前节点的键值对 - if mode == "children": + if mode == "children" or (mode == "auto" and self.children): node_dict = {} else: node_dict = {self.key: formatted_value} @@ -227,7 +239,7 @@ class ActionNode: mode="root": NotImplemented """ - # FIXME: json instruction会带来 "Project name": "web_2048 # 项目名称使用下划线", + # FIXME: json instruction会带来格式问题,如:"Project name": "web_2048 # 项目名称使用下划线", self.instruction = self.compile_instruction(to="markdown", mode=mode) self.example = self.compile_example(to=to, tag="CONTENT", mode=mode) prompt = template.format(context=context, example=self.example, instruction=self.instruction, @@ -268,19 +280,59 @@ class ActionNode: def get(self, key): return self.instruct_content.dict()[key] - async def fill(self, context, llm, to="json"): - """运行这个ActionNode,并且填槽,可以采用不同策略,比如只运行子节点""" - self.llm = llm - prompt = self.compile(context=context, to=to) - mapping = self.get_children_mapping() + def set_recursive(self, name, value): + setattr(self, name, value) + for _, i in self.children.items(): + i.set_recursive(name, value) + + def set_llm(self, llm): + self.set_recursive("llm", llm) + + def set_context(self, context): + self.set_recursive("context", context) + + async def simple_fill(self, to, mode): + prompt = self.compile(context=self.context, to=to, mode=mode) + mapping = self.get_mapping(mode) class_name = f"{self.key}_AN" - # 需要传入llm,并且实际在ActionNode中执行。需要规划好具体的执行方法 output = await self._aask_v1(prompt, class_name, mapping, format=to) self.content = output.content self.instruct_content = output.instruct_content return self + async def fill(self, context, llm, to="json", mode="auto", strgy="simple"): + """ Fill the node(s) with mode. + + :param context: Everything we should know when filling node. + :param llm: Large Language Model with pre-defined system message. + :param to: json/markdown, determine example and output format. + - json: it's easy to open source LLM with json format + - markdown: when generating code, markdown is always better + :param mode: auto/children/root + - auto: automated fill children's nodes and gather outputs, if no children, fill itself + - children: fill children's nodes and gather outputs + - root: fill root's node and gather output + :param strgy: simple/complex + - simple: run only once + - complex: run each node + :return: self + """ + self.set_llm(llm) + self.set_context(context) + + if strgy == "simple": + return await self.simple_fill(to, mode) + elif strgy == "complex": + # 这里隐式假设了拥有children + tmp = {} + for _, i in self.children.items(): + child = await i.simple_fill(to, mode) + tmp.update(child.instruct_content.dict()) + cls = self.create_children_class() + self.instruct_content = cls(**tmp) + return self + def action_node_from_tuple_example(): # 示例:列表中包含元组 diff --git a/metagpt/actions/project_management_an.py b/metagpt/actions/project_management_an.py index aa7cdcde2..9849cb7b3 100644 --- a/metagpt/actions/project_management_an.py +++ b/metagpt/actions/project_management_an.py @@ -28,7 +28,7 @@ LOGIC_ANALYSIS = ActionNode( instruction="Provide a list of files with the classes/methods/functions to be implemented, " "including dependency analysis and imports.", example=[["game.py", "Contains Game class and ... functions"], - ["main.py", "Contains main function, from game import Game"]] + ["main.py", "Contains main function, depends on game.py"]] ) TASK_LIST = ActionNode( diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 0781760ba..cbcf920b9 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -16,6 +16,13 @@ LANGUAGE = ActionNode( example="en_us" ) +PROGRAMMING_LANGUAGE = ActionNode( + key="Programming Language", + expected_type=str, + instruction="Python/JavaScript or other mainstream programming language.", + example="Python" +) + ORIGINAL_REQUIREMENTS = ActionNode( key="Original Requirements", expected_type=str, @@ -59,14 +66,14 @@ COMPETITIVE_QUADRANT_CHART = ActionNode( expected_type=str, instruction="Use mermaid quadrantChart syntax. Distribute scores evenly between 0 and 1", example="""quadrantChart - title Reach and engagement of campaigns - x-axis Low Reach --> High Reach - y-axis Low Engagement --> High Engagement - quadrant-1 We should expand - quadrant-2 Need to promote - quadrant-3 Re-evaluate - quadrant-4 May be improved - "Campaign: A": [0.3, 0.6] + title "Reach and engagement of campaigns" + x-axis "Low Reach" --> "High Reach" + y-axis "Low Engagement" --> "High Engagement" + quadrant-1 "We should expand" + quadrant-2 "Need to promote" + quadrant-3 "Re-evaluate" + quadrant-4 "May be improved" + "Campaign A": [0.3, 0.6] "Campaign B": [0.45, 0.23] "Campaign C": [0.57, 0.69] "Campaign D": [0.78, 0.34] @@ -127,6 +134,7 @@ REASON = ActionNode( NODES = [ LANGUAGE, + PROGRAMMING_LANGUAGE, ORIGINAL_REQUIREMENTS, PROJECT_NAME, PRODUCT_GOALS, From 290fb8b8d053a4d1441ac64fff60550f0b9e18e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Thu, 14 Dec 2023 20:44:27 +0800 Subject: [PATCH 24/38] refactor: format --- .gitignore | 1 + metagpt/actions/action_node.py | 31 ++- metagpt/actions/design_api_an.py | 27 +-- metagpt/actions/project_management.py | 7 +- metagpt/actions/project_management_an.py | 24 +- metagpt/actions/write_prd_an.py | 41 ++-- metagpt/provider/fireworks_api.py | 3 +- metagpt/provider/open_llm_api.py | 7 +- .../postprecess/base_postprecess_plugin.py | 22 +- .../postprecess/llm_output_postprecess.py | 11 +- metagpt/roles/architect.py | 2 +- metagpt/roles/project_manager.py | 2 +- metagpt/roles/qa_engineer.py | 4 +- metagpt/schema.py | 16 +- metagpt/utils/ahttp_client.py | 56 ++--- metagpt/utils/git_repository.py | 9 +- metagpt/utils/repair_llm_raw_output.py | 31 +-- metagpt/utils/utils.py | 11 +- tests/metagpt/test_llm.py | 1 + tests/metagpt/utils/test_ahttp_client.py | 17 +- .../utils/test_repair_llm_raw_output.py | 223 +++++++++--------- 21 files changed, 262 insertions(+), 284 deletions(-) diff --git a/.gitignore b/.gitignore index e03eab3d3..0ac318ff5 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ cover/ # Django stuff: *.log +logs local_settings.py db.sqlite3 db.sqlite3-journal diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 96c175ccb..ae40913e0 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -5,13 +5,12 @@ @Author : alexanderwu @File : action_node.py """ -import re -from typing import Dict, Type, List, Any, Tuple, Optional import json +import re +from typing import Any, Dict, List, Optional, Type from pydantic import BaseModel, create_model, root_validator, validator -# , model_validator, field_validator -from tenacity import wait_random_exponential, stop_after_attempt, retry +from tenacity import retry, stop_after_attempt, wait_random_exponential from metagpt.actions import ActionOutput from metagpt.llm import BaseGPTAPI @@ -51,6 +50,7 @@ def dict_to_markdown(d, prefix="-", postfix="\n"): class ActionNode: """ActionNode is a tree of nodes.""" + # Action Strgy # - sop: 仅使用一级SOP # - complex: 使用一级SOP+自定义策略填槽 @@ -72,8 +72,7 @@ class ActionNode: content: str instruct_content: BaseModel - def __init__(self, key, expected_type, instruction, example, content="", - children=None): + def __init__(self, key, expected_type, instruction, example, content="", children=None): self.key = key self.expected_type = expected_type self.instruction = instruction @@ -82,8 +81,9 @@ class ActionNode: self.children = children if children is not None else {} def __str__(self): - return f"{self.key}, {self.expected_type}, {self.instruction}, {self.example}" \ - f", {self.content}, {self.children}" + return ( + f"{self.key}, {self.expected_type}, {self.instruction}, {self.example}" f", {self.content}, {self.children}" + ) def __repr__(self): return self.__str__() @@ -136,7 +136,7 @@ class ActionNode: """基于pydantic v2的模型动态生成,用来检验结果类型正确性,待验证""" new_class = create_model(class_name, **mapping) - @model_validator(mode='before') + @model_validator(mode="before") def check_missing_fields(data): required_fields = set(mapping.keys()) missing_fields = required_fields - set(data.keys()) @@ -144,7 +144,7 @@ class ActionNode: raise ValueError(f"Missing fields: {missing_fields}") return data - @field_validator('*') + @field_validator("*") def check_name(v: Any, field: str) -> Any: if field not in mapping.keys(): raise ValueError(f"Unrecognized block: {field}") @@ -230,8 +230,9 @@ class ActionNode: # FIXME: json instruction会带来 "Project name": "web_2048 # 项目名称使用下划线", self.instruction = self.compile_instruction(to="markdown", mode=mode) self.example = self.compile_example(to=to, tag="CONTENT", mode=mode) - prompt = template.format(context=context, example=self.example, instruction=self.instruction, - constraint=CONSTRAINT) + prompt = template.format( + context=context, example=self.example, instruction=self.instruction, constraint=CONSTRAINT + ) return prompt @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) @@ -284,9 +285,7 @@ class ActionNode: def action_node_from_tuple_example(): # 示例:列表中包含元组 - list_of_tuples = [ - ("key1", str, "Instruction 1", "Example 1") - ] + list_of_tuples = [("key1", str, "Instruction 1", "Example 1")] # 从列表中创建 ActionNode 实例 nodes = [ActionNode(*data) for data in list_of_tuples] @@ -294,5 +293,5 @@ def action_node_from_tuple_example(): logger.info(i) -if __name__ == '__main__': +if __name__ == "__main__": action_node_from_tuple_example() diff --git a/metagpt/actions/design_api_an.py b/metagpt/actions/design_api_an.py index 2db203606..0a303cdd5 100644 --- a/metagpt/actions/design_api_an.py +++ b/metagpt/actions/design_api_an.py @@ -6,52 +6,49 @@ @File : design_api_an.py """ from metagpt.actions.action_node import ActionNode -from metagpt.utils.mermaid import MMC1, MMC2 from metagpt.logs import logger +from metagpt.utils.mermaid import MMC1, MMC2 IMPLEMENTATION_APPROACH = ActionNode( key="Implementation approach", expected_type=str, instruction="Analyze the difficult points of the requirements, select the appropriate open-source framework", - example="We will ..." + example="We will ...", ) PROJECT_NAME = ActionNode( - key="Project name", - expected_type=str, - instruction="The project name with underline", - example="game_2048" + key="Project name", expected_type=str, instruction="The project name with underline", example="game_2048" ) FILE_LIST = ActionNode( key="File list", expected_type=list[str], instruction="Only need relative paths. ALWAYS write a main.py or app.py here", - example=['main.py', 'game.py'] + example=["main.py", "game.py"], ) DATA_STRUCTURES_AND_INTERFACES = ActionNode( key="Data structures and interfaces", expected_type=str, instruction="Use mermaid classDiagram code syntax, including classes, method(__init__ etc.) and functions with type" - " annotations, CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " - "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", - example=MMC1 + " annotations, CLEARLY MARK the RELATIONSHIPS between classes, and comply with PEP8 standards. " + "The data structures SHOULD BE VERY DETAILED and the API should be comprehensive with a complete design.", + example=MMC1, ) PROGRAM_CALL_FLOW = ActionNode( key="Program call flow", expected_type=str, instruction="Use sequenceDiagram code syntax, COMPLETE and VERY DETAILED, using CLASSES AND API DEFINED ABOVE " - "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", - example=MMC2 + "accurately, covering the CRUD AND INIT of each object, SYNTAX MUST BE CORRECT.", + example=MMC2, ) ANYTHING_UNCLEAR = ActionNode( key="Anything UNCLEAR", expected_type=str, instruction="Mention unclear project aspects, then try to clarify it.", - example="Clarification needed on third-party API integration, ..." + example="Clarification needed on third-party API integration, ...", ) NODES = [ @@ -60,7 +57,7 @@ NODES = [ FILE_LIST, DATA_STRUCTURES_AND_INTERFACES, PROGRAM_CALL_FLOW, - ANYTHING_UNCLEAR + ANYTHING_UNCLEAR, ] DESIGN_API_NODE = ActionNode.from_children("DesignAPI", NODES) @@ -71,5 +68,5 @@ def main(): logger.info(prompt) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/metagpt/actions/project_management.py b/metagpt/actions/project_management.py index 29e3bed3e..c95be4012 100644 --- a/metagpt/actions/project_management.py +++ b/metagpt/actions/project_management.py @@ -10,7 +10,6 @@ 3. According to the design in Section 2.2.3.5.4 of RFC 135, add incremental iteration functionality. """ import json -# from typing import List from metagpt.actions import ActionOutput from metagpt.actions.action import Action @@ -25,6 +24,9 @@ from metagpt.const import ( from metagpt.logs import logger from metagpt.schema import Document, Documents from metagpt.utils.file_repository import FileRepository + +# from typing import List + # from metagpt.utils.get_template import get_template NEW_REQ_TEMPLATE = """ @@ -97,7 +99,8 @@ class WriteTasks(Action): async def _merge(self, system_design_doc, task_doc, format=CONFIG.prompt_format) -> Document: context = NEW_REQ_TEMPLATE.format(context=system_design_doc.content, old_tasks=task_doc.content) node = await PM_NODE.fill(context, self.llm, format) - return node + task_doc.content = node.content + return task_doc @staticmethod async def _update_requirements(doc): diff --git a/metagpt/actions/project_management_an.py b/metagpt/actions/project_management_an.py index aa7cdcde2..e03af36d7 100644 --- a/metagpt/actions/project_management_an.py +++ b/metagpt/actions/project_management_an.py @@ -12,51 +12,53 @@ REQUIRED_PYTHON_PACKAGES = ActionNode( key="Required Python packages", expected_type=list[str], instruction="Provide required Python packages in requirements.txt format.", - example=["flask==1.1.2", "bcrypt==3.2.0"] + example=["flask==1.1.2", "bcrypt==3.2.0"], ) REQUIRED_OTHER_LANGUAGE_PACKAGES = ActionNode( key="Required Other language third-party packages", expected_type=list[str], instruction="List down the required packages for languages other than Python.", - example=["No third-party dependencies required"] + example=["No third-party dependencies required"], ) LOGIC_ANALYSIS = ActionNode( key="Logic Analysis", expected_type=list[list[str]], instruction="Provide a list of files with the classes/methods/functions to be implemented, " - "including dependency analysis and imports.", - example=[["game.py", "Contains Game class and ... functions"], - ["main.py", "Contains main function, from game import Game"]] + "including dependency analysis and imports.", + example=[ + ["game.py", "Contains Game class and ... functions"], + ["main.py", "Contains main function, from game import Game"], + ], ) TASK_LIST = ActionNode( key="Task list", expected_type=list[str], instruction="Break down the tasks into a list of filenames, prioritized by dependency order.", - example=["game.py", "main.py"] + example=["game.py", "main.py"], ) FULL_API_SPEC = ActionNode( key="Full API spec", expected_type=str, instruction="Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend.", - example="openapi: 3.0.0 ..." + example="openapi: 3.0.0 ...", ) SHARED_KNOWLEDGE = ActionNode( key="Shared Knowledge", expected_type=str, instruction="Detail any shared knowledge, like common utility functions or configuration variables.", - example="'game.py' contains functions shared across the project." + example="'game.py' contains functions shared across the project.", ) ANYTHING_UNCLEAR_PM = ActionNode( key="Anything UNCLEAR", expected_type=str, instruction="Mention any unclear aspects in the project management context and try to clarify them.", - example="Clarification needed on how to start and initialize third-party libraries." + example="Clarification needed on how to start and initialize third-party libraries.", ) NODES = [ @@ -66,7 +68,7 @@ NODES = [ TASK_LIST, FULL_API_SPEC, SHARED_KNOWLEDGE, - ANYTHING_UNCLEAR_PM + ANYTHING_UNCLEAR_PM, ] @@ -78,5 +80,5 @@ def main(): logger.info(prompt) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 0781760ba..849150f6c 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -13,45 +13,45 @@ LANGUAGE = ActionNode( key="Language", expected_type=str, instruction="Provide the language used in the project, typically matching the user's requirement language.", - example="en_us" + example="en_us", ) ORIGINAL_REQUIREMENTS = ActionNode( key="Original Requirements", expected_type=str, instruction="Place the polished, complete original requirements here.", - example="The game should have a leaderboard and multiple difficulty levels." + example="The game should have a leaderboard and multiple difficulty levels.", ) PROJECT_NAME = ActionNode( key="Project Name", expected_type=str, instruction="Name the project using snake case style, like 'game_2048' or 'simple_crm'.", - example="game_2048" + example="game_2048", ) PRODUCT_GOALS = ActionNode( key="Product Goals", expected_type=list[str], instruction="Provide up to three clear, orthogonal product goals.", - example=["Create an engaging user experience", - "Ensure high performance", - "Provide customizable features"] + example=["Create an engaging user experience", "Ensure high performance", "Provide customizable features"], ) USER_STORIES = ActionNode( key="User Stories", expected_type=list[str], instruction="Provide up to five scenario-based user stories.", - example=["As a user, I want to be able to choose difficulty levels", - "As a player, I want to see my score after each game"] + example=[ + "As a user, I want to be able to choose difficulty levels", + "As a player, I want to see my score after each game", + ], ) COMPETITIVE_ANALYSIS = ActionNode( key="Competitive Analysis", expected_type=list[str], instruction="Provide analyses for up to seven competitive products.", - example=["Python Snake Game: Simple interface, lacks advanced features"] + example=["Python Snake Game: Simple interface, lacks advanced features"], ) COMPETITIVE_QUADRANT_CHART = ActionNode( @@ -72,56 +72,53 @@ COMPETITIVE_QUADRANT_CHART = ActionNode( "Campaign D": [0.78, 0.34] "Campaign E": [0.40, 0.34] "Campaign F": [0.35, 0.78] - "Our Target Product": [0.5, 0.6]""" + "Our Target Product": [0.5, 0.6]""", ) REQUIREMENT_ANALYSIS = ActionNode( key="Requirement Analysis", expected_type=str, instruction="Provide a detailed analysis of the requirements.", - example="The product should be user-friendly and performance-optimized." + example="The product should be user-friendly and performance-optimized.", ) REQUIREMENT_POOL = ActionNode( key="Requirement Pool", expected_type=list[list[str]], instruction="List down the requirements with their priority (P0, P1, P2).", - example=[["P0", "High priority requirement"], ["P1", "Medium priority requirement"]] + example=[["P0", "High priority requirement"], ["P1", "Medium priority requirement"]], ) UI_DESIGN_DRAFT = ActionNode( key="UI Design draft", expected_type=str, instruction="Provide a simple description of UI elements, functions, style, and layout.", - example="Basic function description with a simple style and layout." + example="Basic function description with a simple style and layout.", ) ANYTHING_UNCLEAR = ActionNode( key="Anything UNCLEAR", expected_type=str, instruction="Mention any aspects of the project that are unclear and try to clarify them.", - example="..." + example="...", ) ISSUE_TYPE = ActionNode( key="issue_type", expected_type=str, instruction="Answer BUG/REQUIREMENT. If it is a bugfix, answer BUG, otherwise answer Requirement", - example="BUG" + example="BUG", ) IS_RELATIVE = ActionNode( key="is_relative", expected_type=str, instruction="Answer YES/NO. If the requirement is related to the old PRD, answer YES, otherwise NO", - example="YES" + example="YES", ) REASON = ActionNode( - key="reason", - expected_type=str, - instruction="Explain the reasoning process from question to answer", - example="..." + key="reason", expected_type=str, instruction="Explain the reasoning process from question to answer", example="..." ) @@ -136,7 +133,7 @@ NODES = [ REQUIREMENT_ANALYSIS, REQUIREMENT_POOL, UI_DESIGN_DRAFT, - ANYTHING_UNCLEAR + ANYTHING_UNCLEAR, ] WRITE_PRD_NODE = ActionNode.from_children("WritePRD", NODES) @@ -149,5 +146,5 @@ def main(): logger.info(prompt) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/metagpt/provider/fireworks_api.py b/metagpt/provider/fireworks_api.py index 23126af2d..47ac9cf61 100644 --- a/metagpt/provider/fireworks_api.py +++ b/metagpt/provider/fireworks_api.py @@ -5,11 +5,10 @@ import openai from metagpt.config import CONFIG -from metagpt.provider.openai_api import OpenAIGPTAPI, CostManager, RateLimiter +from metagpt.provider.openai_api import CostManager, OpenAIGPTAPI, RateLimiter class FireWorksGPTAPI(OpenAIGPTAPI): - def __init__(self): self.__init_fireworks(CONFIG) self.llm = openai diff --git a/metagpt/provider/open_llm_api.py b/metagpt/provider/open_llm_api.py index a6820b42b..f421e30c8 100644 --- a/metagpt/provider/open_llm_api.py +++ b/metagpt/provider/open_llm_api.py @@ -4,13 +4,13 @@ import openai -from metagpt.logs import logger from metagpt.config import CONFIG -from metagpt.provider.openai_api import OpenAIGPTAPI, CostManager, RateLimiter +from metagpt.logs import logger +from metagpt.provider.openai_api import CostManager, OpenAIGPTAPI, RateLimiter class OpenLLMCostManager(CostManager): - """ open llm model is self-host, it's free and without cost""" + """open llm model is self-host, it's free and without cost""" def update_cost(self, prompt_tokens, completion_tokens, model): """ @@ -32,7 +32,6 @@ class OpenLLMCostManager(CostManager): class OpenLLMGPTAPI(OpenAIGPTAPI): - def __init__(self): self.__init_openllm(CONFIG) self.llm = openai diff --git a/metagpt/provider/postprecess/base_postprecess_plugin.py b/metagpt/provider/postprecess/base_postprecess_plugin.py index 702a03194..0d1cfbb11 100644 --- a/metagpt/provider/postprecess/base_postprecess_plugin.py +++ b/metagpt/provider/postprecess/base_postprecess_plugin.py @@ -5,13 +5,15 @@ from typing import Union from metagpt.logs import logger -from metagpt.utils.repair_llm_raw_output import RepairType -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, extract_content_from_output, \ - retry_parse_json_text +from metagpt.utils.repair_llm_raw_output import ( + RepairType, + extract_content_from_output, + repair_llm_raw_output, + retry_parse_json_text, +) class BasePostPrecessPlugin(object): - model = None # the plugin of the `model`, use to judge in `llm_postprecess` def run_repair_llm_output(self, output: str, schema: dict, req_key: str = "[/CONTENT]") -> Union[dict, list]: @@ -33,15 +35,15 @@ class BasePostPrecessPlugin(object): return parsed_data def run_repair_llm_raw_output(self, content: str, req_keys: list[str], repair_type: str = None) -> str: - """ inherited class can re-implement the function""" + """inherited class can re-implement the function""" return repair_llm_raw_output(content, req_keys=req_keys, repair_type=repair_type) def run_extract_content_from_output(self, content: str, right_key: str) -> str: - """ inherited class can re-implement the function""" + """inherited class can re-implement the function""" return extract_content_from_output(content, right_key=right_key) def run_retry_parse_json_text(self, content: str) -> Union[dict, list]: - """ inherited class can re-implement the function""" + """inherited class can re-implement the function""" logger.info(f"extracted json CONTENT from output:\n{content}") parsed_data = retry_parse_json_text(output=content) # should use output=content return parsed_data @@ -64,9 +66,5 @@ class BasePostPrecessPlugin(object): assert "/" in req_key # current, postprocess only deal the repair_llm_raw_output - new_output = self.run_repair_llm_output( - output=output, - schema=schema, - req_key=req_key - ) + new_output = self.run_repair_llm_output(output=output, schema=schema, req_key=req_key) return new_output diff --git a/metagpt/provider/postprecess/llm_output_postprecess.py b/metagpt/provider/postprecess/llm_output_postprecess.py index 4b5955061..85405543d 100644 --- a/metagpt/provider/postprecess/llm_output_postprecess.py +++ b/metagpt/provider/postprecess/llm_output_postprecess.py @@ -7,17 +7,14 @@ from typing import Union from metagpt.provider.postprecess.base_postprecess_plugin import BasePostPrecessPlugin -def llm_output_postprecess(output: str, schema: dict, req_key: str = "[/CONTENT]", - model_name: str = None) -> Union[dict, str]: +def llm_output_postprecess( + output: str, schema: dict, req_key: str = "[/CONTENT]", model_name: str = None +) -> Union[dict, str]: """ default use BasePostPrecessPlugin if there is not matched plugin. """ # TODO choose different model's plugin according to the model_name postprecess_plugin = BasePostPrecessPlugin() - result = postprecess_plugin.run( - output=output, - schema=schema, - req_key=req_key - ) + result = postprecess_plugin.run(output=output, schema=schema, req_key=req_key) return result diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index b80ef85be..2c0bdd1d6 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -27,7 +27,7 @@ class Architect(Role): name: str = "Bob", profile: str = "Architect", goal: str = "design a concise, usable, complete software system", - constraints: str = "make sure the architecture is simple enough and use appropriate open source libraries" + constraints: str = "make sure the architecture is simple enough and use appropriate open source libraries", ) -> None: """Initializes the Architect with given attributes.""" super().__init__(name, profile, goal, constraints) diff --git a/metagpt/roles/project_manager.py b/metagpt/roles/project_manager.py index 37090b24f..bfe1be251 100644 --- a/metagpt/roles/project_manager.py +++ b/metagpt/roles/project_manager.py @@ -26,7 +26,7 @@ class ProjectManager(Role): name: str = "Eve", profile: str = "Project Manager", goal: str = "break down tasks according to PRD/technical design, generate a task list, and analyze task " - "dependencies to start with the prerequisite modules", + "dependencies to start with the prerequisite modules", constraints: str = "", ) -> None: """ diff --git a/metagpt/roles/qa_engineer.py b/metagpt/roles/qa_engineer.py index 15a01b9e9..c1573e63b 100644 --- a/metagpt/roles/qa_engineer.py +++ b/metagpt/roles/qa_engineer.py @@ -14,9 +14,7 @@ @Modified By: mashenquan, 2023-12-5. Enhance the workflow to navigate to WriteCode or QaEngineer based on the results of SummarizeCode. """ -from metagpt.actions import DebugError, RunCode, WriteCode, WriteCodeReview, WriteTest - -# from metagpt.const import WORKSPACE_ROOT +from metagpt.actions import DebugError, RunCode, WriteTest from metagpt.actions.summarize_code import SummarizeCode from metagpt.config import CONFIG from metagpt.const import ( diff --git a/metagpt/schema.py b/metagpt/schema.py index 25281e399..baed5582b 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -97,14 +97,14 @@ class Message(BaseModel): send_to: Set = Field(default_factory={MESSAGE_ROUTE_TO_ALL}) def __init__( - self, - content, - instruct_content=None, - role="user", - cause_by="", - sent_from="", - send_to=MESSAGE_ROUTE_TO_ALL, - **kwargs, + self, + content, + instruct_content=None, + role="user", + cause_by="", + sent_from="", + send_to=MESSAGE_ROUTE_TO_ALL, + **kwargs, ): """ Parameters not listed below will be stored as meta info, including custom parameters. diff --git a/metagpt/utils/ahttp_client.py b/metagpt/utils/ahttp_client.py index d4f9f94e5..b4a33e9d7 100644 --- a/metagpt/utils/ahttp_client.py +++ b/metagpt/utils/ahttp_client.py @@ -2,29 +2,24 @@ # -*- coding: utf-8 -*- # @Desc : pure async http_client -from typing import Optional, Any, Mapping, Union +from typing import Any, Mapping, Optional, Union -from aiohttp.client import DEFAULT_TIMEOUT import aiohttp +from aiohttp.client import DEFAULT_TIMEOUT -async def apost(url: str, - params: Optional[Mapping[str, str]] = None, - json: Any = None, - data: Any = None, - headers: Optional[dict] = None, - as_json: bool = False, - encoding: str = "utf-8", - timeout: int = DEFAULT_TIMEOUT.total) -> Union[str, dict]: +async def apost( + url: str, + params: Optional[Mapping[str, str]] = None, + json: Any = None, + data: Any = None, + headers: Optional[dict] = None, + as_json: bool = False, + encoding: str = "utf-8", + timeout: int = DEFAULT_TIMEOUT.total, +) -> Union[str, dict]: async with aiohttp.ClientSession() as session: - async with session.post( - url=url, - params=params, - json=json, - data=data, - headers=headers, - timeout=timeout - ) as resp: + async with session.post(url=url, params=params, json=json, data=data, headers=headers, timeout=timeout) as resp: if as_json: data = await resp.json() else: @@ -33,13 +28,15 @@ async def apost(url: str, return data -async def apost_stream(url: str, - params: Optional[Mapping[str, str]] = None, - json: Any = None, - data: Any = None, - headers: Optional[dict] = None, - encoding: str = "utf-8", - timeout: int = DEFAULT_TIMEOUT.total) -> Any: +async def apost_stream( + url: str, + params: Optional[Mapping[str, str]] = None, + json: Any = None, + data: Any = None, + headers: Optional[dict] = None, + encoding: str = "utf-8", + timeout: int = DEFAULT_TIMEOUT.total, +) -> Any: """ usage: result = astream(url="xx") @@ -47,13 +44,6 @@ async def apost_stream(url: str, deal_with(line) """ async with aiohttp.ClientSession() as session: - async with session.post( - url=url, - params=params, - json=json, - data=data, - headers=headers, - timeout=timeout - ) as resp: + async with session.post(url=url, params=params, json=json, data=data, headers=headers, timeout=timeout) as resp: async for line in resp.content: yield line.decode(encoding) diff --git a/metagpt/utils/git_repository.py b/metagpt/utils/git_repository.py index 9827b8252..1340b1768 100644 --- a/metagpt/utils/git_repository.py +++ b/metagpt/utils/git_repository.py @@ -8,13 +8,15 @@ """ from __future__ import annotations -from gitignore_parser import parse_gitignore, rule_from_pattern, handle_negation import shutil from enum import Enum from pathlib import Path from typing import Dict, List + from git.repo import Repo from git.repo.fun import is_git_dir +from gitignore_parser import parse_gitignore + from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.logs import logger from metagpt.utils.dependency_file import DependencyFile @@ -236,8 +238,9 @@ class GitRepository: rpath = file_path.relative_to(root_relative_path) files.append(str(rpath)) else: - subfolder_files = self.get_files(relative_path=file_path, root_relative_path=root_relative_path, - filter_ignored=False) + subfolder_files = self.get_files( + relative_path=file_path, root_relative_path=root_relative_path, filter_ignored=False + ) files.extend(subfolder_files) except Exception as e: logger.error(f"Error: {e}") diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 0a461d360..4aafd8e66 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -4,12 +4,13 @@ import copy from enum import Enum -from typing import Union, Callable -import regex as re -from tenacity import retry, stop_after_attempt, wait_fixed, after_log, RetryCallState +from typing import Callable, Union + +import regex as re +from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed -from metagpt.logs import logger from metagpt.config import CONFIG +from metagpt.logs import logger from metagpt.utils.custom_decoder import CustomDecoder @@ -33,7 +34,7 @@ def repair_case_sensitivity(output: str, req_key: str) -> str: if req_key_lower in output_lower: # find the sub-part index, and replace it with raw req_key lidx = output_lower.find(req_key_lower) - source = output[lidx: lidx + len(req_key_lower)] + source = output[lidx : lidx + len(req_key_lower)] output = output.replace(source, req_key) logger.info(f"repair_case_sensitivity: {req_key}") @@ -73,7 +74,7 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - sc = "/" # special char if req_key.startswith("[") and req_key.endswith("]"): if sc in req_key: - left_key = req_key.replace(sc, "") # `[/req_key]` -> `[req_key]` + left_key = req_key.replace(sc, "") # `[/req_key]` -> `[req_key]` right_key = req_key else: left_key = req_key @@ -82,6 +83,7 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - if left_key not in output: output = left_key + "\n" + output if right_key not in output: + def judge_potential_json(routput: str, left_key: str) -> Union[str, None]: ridx = routput.rfind(left_key) if ridx < 0: @@ -90,7 +92,7 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - idx1 = sub_output.rfind("}") idx2 = sub_output.rindex("]") idx = idx1 if idx1 >= idx2 else idx2 - sub_output = sub_output[: idx+1] + sub_output = sub_output[: idx + 1] return sub_output if output.strip().endswith("}") or (output.strip().endswith("]") and not output.strip().endswith(left_key)): @@ -155,9 +157,7 @@ def repair_llm_raw_output(output: str, req_keys: list[str], repair_type: RepairT # do the repairation usually for non-openai models for req_key in req_keys: - output = _repair_llm_raw_output(output=output, - req_key=req_key, - repair_type=repair_type) + output = _repair_llm_raw_output(output=output, req_key=req_key, repair_type=repair_type) return output @@ -187,7 +187,7 @@ def repair_invalid_json(output: str, error: str) -> str: new_line = line.replace("}", "") elif line.endswith("},") and output.endswith("},"): new_line = line[:-1] - elif '",' not in line and ',' not in line: + elif '",' not in line and "," not in line: new_line = f'{line}",' elif "," not in line: # problem, miss char `,` at the end. @@ -228,8 +228,10 @@ def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["R elif retry_state.kwargs: func_param_output = retry_state.kwargs.get("output", "") exp_str = str(retry_state.outcome.exception()) - logger.warning(f"parse json from content inside [CONTENT][/CONTENT] failed at retry " - f"{retry_state.attempt_number}, try to fix it, exp: {exp_str}") + logger.warning( + f"parse json from content inside [CONTENT][/CONTENT] failed at retry " + f"{retry_state.attempt_number}, try to fix it, exp: {exp_str}" + ) repaired_output = repair_invalid_json(func_param_output, exp_str) retry_state.kwargs["output"] = repaired_output @@ -260,7 +262,8 @@ def retry_parse_json_text(output: str) -> Union[list, dict]: def extract_content_from_output(content: str, right_key: str = "[/CONTENT]"): - """ extract xxx from [CONTENT](xxx)[/CONTENT] using regex pattern """ + """extract xxx from [CONTENT](xxx)[/CONTENT] using regex pattern""" + def re_extract_content(cont: str, pattern: str) -> str: matches = re.findall(pattern, cont, re.DOTALL) for match in matches: diff --git a/metagpt/utils/utils.py b/metagpt/utils/utils.py index f479ec3b8..5ceed65d9 100644 --- a/metagpt/utils/utils.py +++ b/metagpt/utils/utils.py @@ -4,7 +4,7 @@ import typing -from tenacity import after_log, _utils +from tenacity import _utils def general_after_log(logger: "loguru.Logger", sec_format: str = "%0.3f") -> typing.Callable[["RetryCallState"], None]: @@ -13,7 +13,10 @@ def general_after_log(logger: "loguru.Logger", sec_format: str = "%0.3f") -> typ fn_name = "" else: fn_name = _utils.get_callback_name(retry_state.fn) - logger.error(f"Finished call to '{fn_name}' after {sec_format % retry_state.seconds_since_start}(s), " - f"this was the {_utils.to_ordinal(retry_state.attempt_number)} time calling it. " - f"exp: {retry_state.outcome.exception()}") + logger.error( + f"Finished call to '{fn_name}' after {sec_format % retry_state.seconds_since_start}(s), " + f"this was the {_utils.to_ordinal(retry_state.attempt_number)} time calling it. " + f"exp: {retry_state.outcome.exception()}" + ) + return log_it diff --git a/tests/metagpt/test_llm.py b/tests/metagpt/test_llm.py index 49969a2af..408fd3162 100644 --- a/tests/metagpt/test_llm.py +++ b/tests/metagpt/test_llm.py @@ -33,5 +33,6 @@ async def test_llm_acompletion(llm): assert len(await llm.acompletion_batch([hello_msg])) > 0 assert len(await llm.acompletion_batch_text([hello_msg])) > 0 + # if __name__ == "__main__": # pytest.main([__file__, "-s"]) diff --git a/tests/metagpt/utils/test_ahttp_client.py b/tests/metagpt/utils/test_ahttp_client.py index 15159423a..a595d645f 100644 --- a/tests/metagpt/utils/test_ahttp_client.py +++ b/tests/metagpt/utils/test_ahttp_client.py @@ -9,30 +9,21 @@ from metagpt.utils.ahttp_client import apost, apost_stream @pytest.mark.asyncio async def test_apost(): - result = await apost( - url="https://www.baidu.com/" - ) + result = await apost(url="https://www.baidu.com/") assert "百度一下" in result result = await apost( - url="http://aider.meizu.com/app/weather/listWeather", - data={"cityIds": "101240101"}, - as_json=True + url="http://aider.meizu.com/app/weather/listWeather", data={"cityIds": "101240101"}, as_json=True ) assert result["code"] == "200" @pytest.mark.asyncio async def test_apost_stream(): - result = apost_stream( - url="https://www.baidu.com/" - ) + result = apost_stream(url="https://www.baidu.com/") async for line in result: assert len(line) >= 0 - result = apost_stream( - url="http://aider.meizu.com/app/weather/listWeather", - data={"cityIds": "101240101"} - ) + result = apost_stream(url="http://aider.meizu.com/app/weather/listWeather", data={"cityIds": "101240101"}) async for line in result: assert len(line) >= 0 diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index a2dd18516..21bbee921 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -4,10 +4,15 @@ from metagpt.config import CONFIG -CONFIG.repair_llm_output = True +from metagpt.utils.repair_llm_raw_output import ( + RepairType, + extract_content_from_output, + repair_invalid_json, + repair_llm_raw_output, + retry_parse_json_text, +) -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType, repair_invalid_json,\ - extract_content_from_output, retry_parse_json_text +CONFIG.repair_llm_output = True def test_repair_case_sensitivity(): @@ -26,8 +31,7 @@ def test_repair_case_sensitivity(): "Requirement Analysis": "The 2048 game should be simple to play" }""" req_keys = ["Original Requirements", "Search Information", "Competitive Quadrant Chart", "Requirement Analysis"] - output = repair_llm_raw_output(output=raw_output, - req_keys=req_keys) + output = repair_llm_raw_output(output=raw_output, req_keys=req_keys) assert output == target_output @@ -40,8 +44,7 @@ def test_repair_special_character_missing(): "Anything UNCLEAR": "No unclear requirements or information." [/CONTENT]""" req_keys = ["[/CONTENT]"] - output = repair_llm_raw_output(output=raw_output, - req_keys=req_keys) + output = repair_llm_raw_output(output=raw_output, req_keys=req_keys) assert output == target_output raw_output = """[CONTENT] tag @@ -56,15 +59,13 @@ def test_repair_special_character_missing(): "Anything UNCLEAR": "No unclear requirements or information." } [/CONTENT]""" - output = repair_llm_raw_output(output=raw_output, - req_keys=req_keys) + output = repair_llm_raw_output(output=raw_output, req_keys=req_keys) assert output == target_output raw_output = '[CONTENT] {"a": "b"} [CONTENT]' target_output = '[CONTENT] {"a": "b"} [/CONTENT]' - output = repair_llm_raw_output(output=raw_output, - req_keys=["[/CONTENT]"]) + output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) print("output\n", output) assert output == target_output @@ -73,38 +74,35 @@ def test_required_key_pair_missing(): raw_output = '[CONTENT] {"a": "b"}' target_output = '[CONTENT] {"a": "b"}\n[/CONTENT]' - output = repair_llm_raw_output(output=raw_output, - req_keys=["[/CONTENT]"]) + output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) assert output == target_output - raw_output = '''[CONTENT] + raw_output = """[CONTENT] { "key": "value" -]''' - target_output = '''[CONTENT] +]""" + target_output = """[CONTENT] { "key": "value" ] -[/CONTENT]''' +[/CONTENT]""" - output = repair_llm_raw_output(output=raw_output, - req_keys=["[/CONTENT]"]) + output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) assert output == target_output - raw_output = '''[CONTENT] tag + raw_output = """[CONTENT] tag [CONTENT] { "key": "value" } xxx -''' - target_output = '''[CONTENT] +""" + target_output = """[CONTENT] { "key": "value" } -[/CONTENT]''' - output = repair_llm_raw_output(output=raw_output, - req_keys=["[/CONTENT]"]) +[/CONTENT]""" + output = repair_llm_raw_output(output=raw_output, req_keys=["[/CONTENT]"]) assert output == target_output @@ -112,25 +110,19 @@ def test_repair_json_format(): raw_output = "{ xxx }]" target_output = "{ xxx }" - output = repair_llm_raw_output(output=raw_output, - req_keys=[None], - repair_type=RepairType.JSON) + output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) assert output == target_output raw_output = "[{ xxx }" target_output = "{ xxx }" - output = repair_llm_raw_output(output=raw_output, - req_keys=[None], - repair_type=RepairType.JSON) + output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) assert output == target_output raw_output = "{ xxx ]" target_output = "{ xxx }" - output = repair_llm_raw_output(output=raw_output, - req_keys=[None], - repair_type=RepairType.JSON) + output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) assert output == target_output @@ -186,7 +178,7 @@ def test_retry_parse_json_text(): target_json = { "Original Requirements": "Create a 2048 game", "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", - "Requirement Analysis": "The requirements are clear and well-defined" + "Requirement Analysis": "The requirements are clear and well-defined", } output = retry_parse_json_text(output=invalid_json_text) assert output == target_json @@ -200,7 +192,7 @@ def test_retry_parse_json_text(): target_json = { "Original Requirements": "Create a 2048 game", "Competitive Quadrant Chart": "quadrantChart\n\ttitle Reach and engagement of campaigns\n\t\tx-axis", - "Requirement Analysis": "The requirements are clear and well-defined" + "Requirement Analysis": "The requirements are clear and well-defined", } output = retry_parse_json_text(output=invalid_json_text) assert output == target_json @@ -214,84 +206,88 @@ def test_extract_content_from_output(): xxx [CONTENT] xxxx [/CONTENT] xxx [CONTENT][/CONTENT] xxx [CONTENT][/CONTENT] # target pair is the last one """ - output = 'Sure! Here is the properly formatted JSON output based on the given context:\n\n[CONTENT]\n{\n"' \ - 'Required Python third-party packages": [\n"pygame==2.0.4",\n"pytest"\n],\n"Required Other language ' \ - 'third-party packages": [\n"No third-party packages are required."\n],\n"Full API spec": "\nopenapi: ' \ - '3.0.0\n\ndescription: A JSON object representing the game state.\n\npaths:\n game:\n get:\n ' \ - 'summary: Get the current game state.\n responses:\n 200:\n description: Game state.' \ - '\n\n moves:\n post:\n summary: Make a move.\n requestBody:\n description: Move to be ' \ - 'made.\n content:\n applicationjson:\n schema:\n type: object\n ' \ - ' properties:\n x:\n type: integer\n y:\n ' \ - ' type: integer\n tile:\n type: object\n ' \ - 'properties:\n value:\n type: integer\n x:\n ' \ - ' type: integer\n y:\n type: integer\n\n ' \ - 'undo-move:\n post:\n summary: Undo the last move.\n responses:\n 200:\n ' \ - ' description: Undone move.\n\n end-game:\n post:\n summary: End the game.\n responses:\n ' \ - ' 200:\n description: Game ended.\n\n start-game:\n post:\n summary: Start a new ' \ - 'game.\n responses:\n 200:\n description: Game started.\n\n game-over:\n get:\n ' \ - ' summary: Check if the game is over.\n responses:\n 200:\n description: Game ' \ - 'over.\n 404:\n description: Game not over.\n\n score:\n get:\n summary: Get the ' \ - 'current score.\n responses:\n 200:\n description: Score.\n\n tile:\n get:\n ' \ - 'summary: Get a specific tile.\n parameters:\n tile_id:\n type: integer\n ' \ - 'description: ID of the tile to get.\n responses:\n 200:\n description: Tile.\n\n ' \ - 'tiles:\n get:\n summary: Get all tiles.\n responses:\n 200:\n description: ' \ - 'Tiles.\n\n level:\n get:\n summary: Get the current level.\n responses:\n 200:\n ' \ - ' description: Level.\n\n level-up:\n post:\n summary: Level up.\n responses:\n ' \ - '200:\n description: Level up successful.\n\n level-down:\n post:\n summary: Level ' \ - 'down.\n responses:\n 200:\n description: Level down successful.\n\n restart:\n ' \ - 'post:\n summary: Restart the game.\n responses:\n 200:\n description: Game ' \ - 'restarted.\n\n help:\n get:\n summary: Get help.\n responses:\n 200:\n ' \ - 'description: Help.\n\n version:\n get:\n summary: Get the version of the game.\n ' \ - 'responses:\n 200:\n description: Version.\n\n}\n\n"Logic Analysis": [\n"game.py",' \ - '\n"Contains the game logic."\n],\n"Task list": [\n"game.py",\n"Contains the game logic and should be ' \ - 'done first."\n],\n"Shared Knowledge": "\n\'game.py\' contains the game logic.\n",\n"Anything ' \ - 'UNCLEAR": "How to start the game."\n]\n\n[/CONTENT] Great! Your JSON output is properly formatted ' \ - 'and correctly includes all the required sections. Here\'s a breakdown of what each section ' \ - 'contains:\n\nRequired Python third-party packages:\n\n* pygame==2.0.4\n* pytest\n\nRequired Other ' \ - 'language third-party packages:\n\n* No third-party packages are required.\n\nFull API spec:\n\n* ' \ - 'openapi: 3.0.0\n* description: A JSON object representing the game state.\n* paths:\n + game: ' \ - 'Get the current game state.\n + moves: Make a move.\n + undo-move: Undo the last move.\n + ' \ - 'end-game: End the game.\n + start-game: Start a new game.\n + game-over: Check if the game is ' \ - 'over.\n + score: Get the current score.\n + tile: Get a specific tile.\n + tiles: Get all tiles.\n ' \ - '+ level: Get the current level.\n + level-up: Level up.\n + level-down: Level down.\n + restart: ' \ - 'Restart the game.\n + help: Get help.\n + version: Get the version of the game.\n\nLogic ' \ - 'Analysis:\n\n* game.py contains the game logic.\n\nTask list:\n\n* game.py contains the game logic ' \ - 'and should be done first.\n\nShared Knowledge:\n\n* \'game.py\' contains the game logic.\n\nAnything ' \ - 'UNCLEAR:\n\n* How to start the game.\n\nGreat job! This JSON output should provide a clear and ' \ - 'comprehensive overview of the project\'s requirements and dependencies.' + output = ( + 'Sure! Here is the properly formatted JSON output based on the given context:\n\n[CONTENT]\n{\n"' + 'Required Python third-party packages": [\n"pygame==2.0.4",\n"pytest"\n],\n"Required Other language ' + 'third-party packages": [\n"No third-party packages are required."\n],\n"Full API spec": "\nopenapi: ' + "3.0.0\n\ndescription: A JSON object representing the game state.\n\npaths:\n game:\n get:\n " + "summary: Get the current game state.\n responses:\n 200:\n description: Game state." + "\n\n moves:\n post:\n summary: Make a move.\n requestBody:\n description: Move to be " + "made.\n content:\n applicationjson:\n schema:\n type: object\n " + " properties:\n x:\n type: integer\n y:\n " + " type: integer\n tile:\n type: object\n " + "properties:\n value:\n type: integer\n x:\n " + " type: integer\n y:\n type: integer\n\n " + "undo-move:\n post:\n summary: Undo the last move.\n responses:\n 200:\n " + " description: Undone move.\n\n end-game:\n post:\n summary: End the game.\n responses:\n " + " 200:\n description: Game ended.\n\n start-game:\n post:\n summary: Start a new " + "game.\n responses:\n 200:\n description: Game started.\n\n game-over:\n get:\n " + " summary: Check if the game is over.\n responses:\n 200:\n description: Game " + "over.\n 404:\n description: Game not over.\n\n score:\n get:\n summary: Get the " + "current score.\n responses:\n 200:\n description: Score.\n\n tile:\n get:\n " + "summary: Get a specific tile.\n parameters:\n tile_id:\n type: integer\n " + "description: ID of the tile to get.\n responses:\n 200:\n description: Tile.\n\n " + "tiles:\n get:\n summary: Get all tiles.\n responses:\n 200:\n description: " + "Tiles.\n\n level:\n get:\n summary: Get the current level.\n responses:\n 200:\n " + " description: Level.\n\n level-up:\n post:\n summary: Level up.\n responses:\n " + "200:\n description: Level up successful.\n\n level-down:\n post:\n summary: Level " + "down.\n responses:\n 200:\n description: Level down successful.\n\n restart:\n " + "post:\n summary: Restart the game.\n responses:\n 200:\n description: Game " + "restarted.\n\n help:\n get:\n summary: Get help.\n responses:\n 200:\n " + "description: Help.\n\n version:\n get:\n summary: Get the version of the game.\n " + 'responses:\n 200:\n description: Version.\n\n}\n\n"Logic Analysis": [\n"game.py",' + '\n"Contains the game logic."\n],\n"Task list": [\n"game.py",\n"Contains the game logic and should be ' + 'done first."\n],\n"Shared Knowledge": "\n\'game.py\' contains the game logic.\n",\n"Anything ' + 'UNCLEAR": "How to start the game."\n]\n\n[/CONTENT] Great! Your JSON output is properly formatted ' + "and correctly includes all the required sections. Here's a breakdown of what each section " + "contains:\n\nRequired Python third-party packages:\n\n* pygame==2.0.4\n* pytest\n\nRequired Other " + "language third-party packages:\n\n* No third-party packages are required.\n\nFull API spec:\n\n* " + "openapi: 3.0.0\n* description: A JSON object representing the game state.\n* paths:\n + game: " + "Get the current game state.\n + moves: Make a move.\n + undo-move: Undo the last move.\n + " + "end-game: End the game.\n + start-game: Start a new game.\n + game-over: Check if the game is " + "over.\n + score: Get the current score.\n + tile: Get a specific tile.\n + tiles: Get all tiles.\n " + "+ level: Get the current level.\n + level-up: Level up.\n + level-down: Level down.\n + restart: " + "Restart the game.\n + help: Get help.\n + version: Get the version of the game.\n\nLogic " + "Analysis:\n\n* game.py contains the game logic.\n\nTask list:\n\n* game.py contains the game logic " + "and should be done first.\n\nShared Knowledge:\n\n* 'game.py' contains the game logic.\n\nAnything " + "UNCLEAR:\n\n* How to start the game.\n\nGreat job! This JSON output should provide a clear and " + "comprehensive overview of the project's requirements and dependencies." + ) output = extract_content_from_output(output) - assert output.startswith('{\n"Required Python third-party packages') and \ - output.endswith('UNCLEAR": "How to start the game."\n]') + assert output.startswith('{\n"Required Python third-party packages') and output.endswith( + 'UNCLEAR": "How to start the game."\n]' + ) - output = 'Sure, I would be happy to help! Here is the information you provided, formatted as a JSON object ' \ - 'inside the [CONTENT] tag:\n\n[CONTENT]\n{\n"Original Requirements": "Create a 2048 game",\n"Search ' \ - 'Information": "Search results for 2048 game",\n"Requirements": [\n"Create a game with the same rules ' \ - 'as the original 2048 game",\n"Implement a user interface that is easy to use and understand",\n"Add a ' \ - 'scoreboard to track the player progress",\n"Allow the player to undo and redo moves",\n"Implement a ' \ - 'game over screen to display the final score"\n],\n"Product Goals": [\n"Create a fun and engaging game ' \ - 'experience for the player",\n"Design a user interface that is visually appealing and easy to use",\n"' \ - 'Optimize the game for performance and responsiveness"\n],\n"User Stories": [\n"As a player, I want to ' \ - 'be able to move tiles around the board to combine numbers",\n"As a player, I want to be able to undo ' \ - 'and redo moves to correct mistakes",\n"As a player, I want to see the final score and game over screen' \ - ' when I win"\n],\n"Competitive Analysis": [\n"Competitor A: 2048 game with a simple user interface and' \ - ' basic graphics",\n"Competitor B: 2048 game with a more complex user interface and better graphics",' \ - '\n"Competitor C: 2048 game with a unique twist on the rules and a more challenging gameplay experience"' \ - '\n],\n"Competitive Quadrant Chart": "quadrantChart\\n\ttitle Reach and engagement of campaigns\\n\t\t' \ - 'x-axis Low Reach --> High Reach\\n\t\ty-axis Low Engagement --> High Engagement\\n\tquadrant-1 We ' \ - 'should expand\\n\tquadrant-2 Need to promote\\n\tquadrant-3 Re-evaluate\\n\tquadrant-4 May be ' \ - 'improved\\n\tCampaign A: [0.3, 0.6]\\n\tCampaign B: [0.45, 0.23]\\n\tCampaign C: [0.57, 0.69]\\n\t' \ - 'Campaign D: [0.78, 0.34]\\n\tCampaign E: [0.40, 0.34]\\n\tCampaign F: [0.35, 0.78]"\n],\n"Requirement ' \ - 'Analysis": "The requirements are clear and well-defined, but there may be some ambiguity around the ' \ - 'specific implementation details",\n"Requirement Pool": [\n["P0", "Implement a game with the same ' \ - 'rules as the original 2048 game"],\n["P1", "Add a scoreboard to track the player progress"],\n["P2", ' \ - '"Allow the player to undo and redo moves"]\n],\n"UI Design draft": "The UI should be simple and easy ' \ - 'to use, with a clean and visually appealing design. The game board should be the main focus of the ' \ - 'UI, with clear and concise buttons for the player to interact with.",\n"Anything UNCLEAR": ""\n}\n' \ - '[/CONTENT]\n\nI hope this helps! Let me know if you have any further questions or if there anything ' \ - 'else I can do to assist you.' + output = ( + "Sure, I would be happy to help! Here is the information you provided, formatted as a JSON object " + 'inside the [CONTENT] tag:\n\n[CONTENT]\n{\n"Original Requirements": "Create a 2048 game",\n"Search ' + 'Information": "Search results for 2048 game",\n"Requirements": [\n"Create a game with the same rules ' + 'as the original 2048 game",\n"Implement a user interface that is easy to use and understand",\n"Add a ' + 'scoreboard to track the player progress",\n"Allow the player to undo and redo moves",\n"Implement a ' + 'game over screen to display the final score"\n],\n"Product Goals": [\n"Create a fun and engaging game ' + 'experience for the player",\n"Design a user interface that is visually appealing and easy to use",\n"' + 'Optimize the game for performance and responsiveness"\n],\n"User Stories": [\n"As a player, I want to ' + 'be able to move tiles around the board to combine numbers",\n"As a player, I want to be able to undo ' + 'and redo moves to correct mistakes",\n"As a player, I want to see the final score and game over screen' + ' when I win"\n],\n"Competitive Analysis": [\n"Competitor A: 2048 game with a simple user interface and' + ' basic graphics",\n"Competitor B: 2048 game with a more complex user interface and better graphics",' + '\n"Competitor C: 2048 game with a unique twist on the rules and a more challenging gameplay experience"' + '\n],\n"Competitive Quadrant Chart": "quadrantChart\\n\ttitle Reach and engagement of campaigns\\n\t\t' + "x-axis Low Reach --> High Reach\\n\t\ty-axis Low Engagement --> High Engagement\\n\tquadrant-1 We " + "should expand\\n\tquadrant-2 Need to promote\\n\tquadrant-3 Re-evaluate\\n\tquadrant-4 May be " + "improved\\n\tCampaign A: [0.3, 0.6]\\n\tCampaign B: [0.45, 0.23]\\n\tCampaign C: [0.57, 0.69]\\n\t" + 'Campaign D: [0.78, 0.34]\\n\tCampaign E: [0.40, 0.34]\\n\tCampaign F: [0.35, 0.78]"\n],\n"Requirement ' + 'Analysis": "The requirements are clear and well-defined, but there may be some ambiguity around the ' + 'specific implementation details",\n"Requirement Pool": [\n["P0", "Implement a game with the same ' + 'rules as the original 2048 game"],\n["P1", "Add a scoreboard to track the player progress"],\n["P2", ' + '"Allow the player to undo and redo moves"]\n],\n"UI Design draft": "The UI should be simple and easy ' + "to use, with a clean and visually appealing design. The game board should be the main focus of the " + 'UI, with clear and concise buttons for the player to interact with.",\n"Anything UNCLEAR": ""\n}\n' + "[/CONTENT]\n\nI hope this helps! Let me know if you have any further questions or if there anything " + "else I can do to assist you." + ) output = extract_content_from_output(output) - assert output.startswith('{\n"Original Requirements"') and \ - output.endswith('"Anything UNCLEAR": ""\n}') + assert output.startswith('{\n"Original Requirements"') and output.endswith('"Anything UNCLEAR": ""\n}') output = """ Sure, I'd be happy to help! Here's the JSON output for the given context:\n\n[CONTENT]\n{ "Implementation approach": "We will use the open-source framework PyGame to create a 2D game engine, which will @@ -316,5 +312,6 @@ def test_extract_content_from_output(): information for a developer to understand the design and implementation of the 2048 game. """ output = extract_content_from_output(output) - assert output.startswith('{\n"Implementation approach"') and \ - output.endswith('"Anything UNCLEAR": "The requirement is clear to me."\n}') + assert output.startswith('{\n"Implementation approach"') and output.endswith( + '"Anything UNCLEAR": "The requirement is clear to me."\n}' + ) From ce1895a40bfde64af82d6a5cde5c90c1fcef41b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Thu, 14 Dec 2023 21:28:11 +0800 Subject: [PATCH 25/38] feat: Assume it's new requirements if the code directory does not exist --- metagpt/actions/write_prd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index b9bad2233..bb0cf8fb9 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -190,5 +190,9 @@ class WritePRD(Action): CONFIG.git_repo.rename_root(CONFIG.project_name) async def _is_bugfix(self, context) -> bool: + src_workspace_path = CONFIG.git_repo.workdir / CONFIG.git_repo.workdir.name + code_files = CONFIG.git_repo.get_files(relative_path=src_workspace_path) + if not code_files: + return False node = await WP_ISSUE_TYPE_NODE.fill(context, self.llm) return node.get("issue_type") == "BUG" From 84357651e53a82669238ae91ed98610810ddcd89 Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 23:54:00 +0800 Subject: [PATCH 26/38] resolve conflicts --- metagpt/actions/action_node.py | 4 ---- metagpt/actions/project_management_an.py | 3 ++- metagpt/actions/write_prd_an.py | 2 +- metagpt/roles/architect.py | 3 ++- metagpt/roles/engineer.py | 3 ++- metagpt/roles/product_manager.py | 4 ++-- metagpt/roles/project_manager.py | 4 ++-- metagpt/team.py | 2 +- 8 files changed, 12 insertions(+), 13 deletions(-) diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index f5009f345..9fb10f35c 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -52,10 +52,6 @@ def dict_to_markdown(d, prefix="-", postfix="\n"): class ActionNode: """ActionNode is a tree of nodes.""" - - # Action Strgy - # - sop: 仅使用一级SOP - # - complex: 使用一级SOP+自定义策略填槽 mode: str # Action Context diff --git a/metagpt/actions/project_management_an.py b/metagpt/actions/project_management_an.py index e03af36d7..970cb0594 100644 --- a/metagpt/actions/project_management_an.py +++ b/metagpt/actions/project_management_an.py @@ -43,7 +43,8 @@ TASK_LIST = ActionNode( FULL_API_SPEC = ActionNode( key="Full API spec", expected_type=str, - instruction="Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend.", + instruction="Describe all APIs using OpenAPI 3.0 spec that may be used by both frontend and backend. If front-end " + "and back-end communication is not required, leave it blank.", example="openapi: 3.0.0 ...", ) diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 2c81bdb6e..68402e504 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -86,7 +86,7 @@ REQUIREMENT_ANALYSIS = ActionNode( key="Requirement Analysis", expected_type=str, instruction="Provide a detailed analysis of the requirements.", - example="The product should be user-friendly and performance-optimized.", + example="The product should be user-friendly.", ) REQUIREMENT_POOL = ActionNode( diff --git a/metagpt/roles/architect.py b/metagpt/roles/architect.py index 2c0bdd1d6..fa91d393d 100644 --- a/metagpt/roles/architect.py +++ b/metagpt/roles/architect.py @@ -27,7 +27,8 @@ class Architect(Role): name: str = "Bob", profile: str = "Architect", goal: str = "design a concise, usable, complete software system", - constraints: str = "make sure the architecture is simple enough and use appropriate open source libraries", + constraints: str = "make sure the architecture is simple enough and use appropriate open source libraries." + "Use same language as user requirement" ) -> None: """Initializes the Architect with given attributes.""" super().__init__(name, profile, goal, constraints) diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index 844f3589d..2f99d132e 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -72,7 +72,8 @@ class Engineer(Role): name: str = "Alex", profile: str = "Engineer", goal: str = "write elegant, readable, extensible, efficient code", - constraints: str = "the code should conform to standards like PEP8 and be modular and maintainable", + constraints: str = "the code should conform to standards like PEP8 and be modular and maintainable. " + "Use same language as user requirement", n_borg: int = 1, use_code_review: bool = False, ) -> None: diff --git a/metagpt/roles/product_manager.py b/metagpt/roles/product_manager.py index 017feade7..e5e9f2b5e 100644 --- a/metagpt/roles/product_manager.py +++ b/metagpt/roles/product_manager.py @@ -28,8 +28,8 @@ class ProductManager(Role): self, name: str = "Alice", profile: str = "Product Manager", - goal: str = "Efficiently create a successful product", - constraints: str = "", + goal: str = "efficiently create a successful product", + constraints: str = "use same language as user requirement", ) -> None: """ Initializes the ProductManager role with given attributes. diff --git a/metagpt/roles/project_manager.py b/metagpt/roles/project_manager.py index bfe1be251..5a2b9be50 100644 --- a/metagpt/roles/project_manager.py +++ b/metagpt/roles/project_manager.py @@ -26,8 +26,8 @@ class ProjectManager(Role): name: str = "Eve", profile: str = "Project Manager", goal: str = "break down tasks according to PRD/technical design, generate a task list, and analyze task " - "dependencies to start with the prerequisite modules", - constraints: str = "", + "dependencies to start with the prerequisite modules", + constraints: str = "use same language as user requirement", ) -> None: """ Initializes the ProjectManager role with given attributes. diff --git a/metagpt/team.py b/metagpt/team.py index 92f379c97..e1b2a9ffc 100644 --- a/metagpt/team.py +++ b/metagpt/team.py @@ -63,7 +63,7 @@ class Team(BaseModel): while n_round > 0: # self._save() n_round -= 1 - logger.debug(f"{n_round=}") + logger.info(f"max {n_round=} left.") self._check_balance() await self.env.run() if CONFIG.git_repo: From ad0ac940936e089058842f953426b25533d7614f Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 20:27:18 +0800 Subject: [PATCH 27/38] fix code review performance drop --- metagpt/actions/write_code.py | 6 ++++-- metagpt/actions/write_code_review.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index a2501db2a..b759f4e2a 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -115,7 +115,7 @@ class WriteCode(Action): if test_doc: test_detail = RunCodeResult.loads(test_doc.content) logs = test_detail.stderr - code_context = await self._get_codes(coding_context.task_doc) + code_context = await self.get_codes(coding_context.task_doc, exclude=self.context.filename) prompt = PROMPT_TEMPLATE.format( design=coding_context.design_doc.content, tasks=coding_context.task_doc.content if coding_context.task_doc else "", @@ -133,7 +133,7 @@ class WriteCode(Action): return coding_context @staticmethod - async def _get_codes(task_doc) -> str: + async def get_codes(task_doc, exclude) -> str: if not task_doc: return "" if not task_doc.content: @@ -143,6 +143,8 @@ class WriteCode(Action): codes = [] src_file_repo = CONFIG.git_repo.new_file_repository(relative_path=CONFIG.src_workspace) for filename in code_filenames: + if filename == exclude: + continue doc = await src_file_repo.get(filename=filename) if not doc: continue diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index e0a538fc8..75313fea5 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -10,6 +10,7 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential +from metagpt.actions import WriteCode from metagpt.actions.action import Action from metagpt.config import CONFIG from metagpt.logs import logger @@ -109,11 +110,12 @@ class WriteCodeReview(Action): for i in range(k): format_example = FORMAT_EXAMPLE.format(filename=self.context.code_doc.filename) task_content = self.context.task_doc.content if self.context.task_doc else "" + code_context = await WriteCode.get_codes(self.context.task_doc, exclude=self.context.filename) context = "\n----------\n".join( [ "```text\n" + self.context.design_doc.content + "```\n", "```text\n" + task_content + "```\n", - "```python\n" + self.context.code_doc.content + "```\n", + "```python\n" + code_context + "```\n", ] ) prompt = PROMPT_TEMPLATE.format( From ccecb45b13f5786c5ff842ee27516f67ec97b7f4 Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 23:54:38 +0800 Subject: [PATCH 28/38] resolve conflicts --- metagpt/actions/action.py | 1 + metagpt/actions/action_node.py | 2 +- metagpt/actions/write_code.py | 61 ++++++++---------- metagpt/actions/write_code_review.py | 95 ++++++++++++++++------------ metagpt/actions/write_prd_an.py | 2 +- metagpt/provider/base_gpt_api.py | 2 +- metagpt/roles/engineer.py | 7 +- metagpt/roles/role.py | 6 +- metagpt/schema.py | 2 +- 9 files changed, 95 insertions(+), 83 deletions(-) diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 1d9be60e0..6c1f63f45 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -44,6 +44,7 @@ class Action(ABC): self.prefix = prefix self.profile = profile self.llm.system_prompt = prefix + return self def __str__(self): return self.__class__.__name__ diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 9fb10f35c..1d808ec70 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -243,7 +243,7 @@ class ActionNode: ) return prompt - @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) + @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6)) async def _aask_v1( self, prompt: str, diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index b759f4e2a..a91e4ee1e 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -34,59 +34,52 @@ from metagpt.utils.file_repository import FileRepository PROMPT_TEMPLATE = """ NOTICE -Role: You are a professional engineer; the main goal is to write PEP8 compliant, elegant, modular, easy to read and maintain Python 3.9 code (but you can also use other programming language) +Role: You are a professional engineer; the main goal is to write google-style, elegant, modular, easy to read and maintain code Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenced "Format example". ------ -# Design -```json +# Context +## Design {design} -``` ------ -# Tasks -```json + +## Tasks {tasks} -``` ------ -# Legacy Code -```python + +## Legacy Code +```Code {code} ``` ------ -# Debug logs + +## Debug logs ```text {logs} {summary_log} ``` ------ -# Bug Feedback logs + +## Bug Feedback logs ```text {feedback} ``` ------ - -## Code: {filename} Write code with triple quoto, based on the following list and context. -1. Do your best to implement THIS ONLY ONE FILE. ONLY USE EXISTING API. IF NO API, IMPLEMENT IT. -2. Requirement: Based on the context, implement one following code file, note to return only in code form, your code will be part of the entire project, so please implement complete, reliable, reusable code snippets -3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. -4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. -5. Think before writing: What should be implemented and provided in this document? -6. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. -7. Do not use public member functions that do not exist in your design. -8. Before using a variable, make sure you reference it first -9. Write out EVERY DETAIL, DON'T LEAVE TODO. - -## Format example ------ +# Format example ## Code: {filename} ```python ## {filename} ... ``` ------ + +# Instruction: Based on the context, follow "Format example", write code. + +## Code: {filename} Write code with triple quoto, based on the following attentions and context. +1. Only One file: do your best to implement THIS ONLY ONE FILE. +2. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. +3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. AVOID circular import. +4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. +5. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. +6. Before using a external variable/module, make sure you import it first. +7. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. + """ @@ -148,5 +141,5 @@ class WriteCode(Action): doc = await src_file_repo.get(filename=filename) if not doc: continue - codes.append(doc.content) - return "\n----------\n".join(codes) + codes.append(f"----- {filename}\n" + doc.content) + return "\n".join(codes) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 75313fea5..f63a399a9 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -18,8 +18,8 @@ from metagpt.schema import CodingContext from metagpt.utils.common import CodeParser PROMPT_TEMPLATE = """ -NOTICE -Role: You are a professional software engineer, and your main task is to review the code. You need to ensure that the code conforms to the PEP8 standards, is elegantly designed and modularized, easy to read and maintain, and is written in Python 3.9 (or in another programming language). +# System +Role: You are a professional software engineer, and your main task is to review and revise the code. You need to ensure that the code conforms to the google-style standards, is elegantly designed and modularized, easy to read and maintain. Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenced "Format example". @@ -27,53 +27,52 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenc {context} ## Code to be Reviewed: {filename} -``` +```Code {code} ``` +""" ------ -## Code Review: Based on the "Code to be Reviewed", provide key, clear, concise, and specific code modification suggestions, up to 5. +EXAMPLE_AND_INSTRUCTION = """ + +{format_example} + + +# Instruction: Based on the actual code situation, follow one of the "Format example". + +## Code Review: Ordered List. Based on the "Code to be Reviewed", provide key, clear, concise, and specific answer. If any answer is no, explain how to fix it step by step. 1. Is the code implemented as per the requirements? If not, how to achieve it? Analyse it step by step. 2. Is the code logic completely correct? If there are errors, please indicate how to correct them. 3. Does the existing code follow the "Data structures and interfaces"? 4. Are all functions implemented? If there is no implementation, please indicate how to achieve it step by step. 5. Have all necessary pre-dependencies been imported? If not, indicate which ones need to be imported -6. Is the code implemented concisely enough? Are methods from other files being reused correctly? +6. Are methods from other files being reused correctly? -## Code Review Result: If the code doesn't have bugs, we don't need to rewrite it, so answer LGTM and stop. ONLY ANSWER LGTM/LBTM. +## Actions: Ordered List. Things that should be done after CR, such as implementing class A and function B + +## Code Review Result: str. If the code doesn't have bugs, we don't need to rewrite it, so answer LGTM and stop. ONLY ANSWER LGTM/LBTM. LGTM/LBTM -## Rewrite Code: if it still has some bugs, rewrite {filename} based on "Code Review" with triple quotes, try to get LGTM. Do your utmost to optimize THIS SINGLE FILE. Implement ALL TODO. RETURN ALL CODE, NEVER OMIT ANYTHING. 以任何方式省略代码都是不允许的。 -``` -``` - -## Format example -{format_example} - """ FORMAT_EXAMPLE = """ ------ -# EXAMPLE 1 +# Format example 1 ## Code Review: {filename} -1. No, we should add the logic of ... +1. No, we should fix the logic of class A due to ... 2. ... 3. ... -4. ... +4. No, function B is not implemented, ... 5. ... 6. ... -## Code Review Result: {filename} +## Actions +1. fix class A +2. implement function B + +## Code Review Result LBTM -## Rewrite Code: {filename} -```python -## {filename} -... -``` ------ -# EXAMPLE 2 +# Format example 2 ## Code Review: {filename} 1. Yes. 2. Yes. @@ -82,12 +81,20 @@ LBTM 5. Yes. 6. Yes. -## Code Review Result: {filename} -LGTM - -## Rewrite Code: {filename} +## Actions pass ------ + +## Code Review Result +LGTM +""" + +REWRITE_CODE_TEMPLATE = """ +# Instruction: rewrite code based on the Code Review and Actions +## Rewrite Code: CodeBlock. If it still has some bugs, rewrite {filename} with triple quotes. Do your utmost to optimize THIS SINGLE FILE. Return all completed codes and prohibit the return of unfinished codes. +```Code +## {filename} +... +``` """ @@ -96,11 +103,15 @@ class WriteCodeReview(Action): super().__init__(name, context, llm) @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) - async def write_code_review_and_rewrite(self, prompt): - code_rsp = await self._aask(prompt) - result = CodeParser.parse_block("Code Review Result", code_rsp) + async def write_code_review_and_rewrite(self, context_prompt, cr_prompt, filename): + cr_rsp = await self._aask(context_prompt + cr_prompt) + result = CodeParser.parse_block("Code Review Result", cr_rsp) if "LGTM" in result: return result, None + + # if LBTM, rewrite code + rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" + code_rsp = await self._aask(rewrite_prompt) code = CodeParser.parse_code(block="", text=code_rsp) return result, code @@ -111,23 +122,23 @@ class WriteCodeReview(Action): format_example = FORMAT_EXAMPLE.format(filename=self.context.code_doc.filename) task_content = self.context.task_doc.content if self.context.task_doc else "" code_context = await WriteCode.get_codes(self.context.task_doc, exclude=self.context.filename) - context = "\n----------\n".join( + context = "\n".join( [ - "```text\n" + self.context.design_doc.content + "```\n", - "```text\n" + task_content + "```\n", - "```python\n" + code_context + "```\n", + "## System Design\n" + self.context.design_doc.content + "\n", + "## Tasks\n" + task_content + "\n", + "## Code Files\n" + code_context + "\n", ] ) - prompt = PROMPT_TEMPLATE.format( + context_prompt = PROMPT_TEMPLATE.format( context=context, code=iterative_code, filename=self.context.code_doc.filename, - format_example=format_example, ) + cr_prompt = EXAMPLE_AND_INSTRUCTION.format(format_example=format_example, ) logger.info( - f"Code review and rewrite {self.context.code_doc.filename,}: {i+1}/{k} | {len(iterative_code)=}, {len(self.context.code_doc.content)=}" + f"Code review and rewrite {self.context.code_doc.filename}: {i+1}/{k} | {len(iterative_code)=}, {len(self.context.code_doc.content)=}" ) - result, rewrited_code = await self.write_code_review_and_rewrite(prompt) + result, rewrited_code = await self.write_code_review_and_rewrite(context_prompt, cr_prompt, self.context.code_doc.filename) if "LBTM" in result: iterative_code = rewrited_code elif "LGTM" in result: diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 68402e504..d96c0aeac 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -93,7 +93,7 @@ REQUIREMENT_POOL = ActionNode( key="Requirement Pool", expected_type=list[list[str]], instruction="List down the requirements with their priority (P0, P1, P2).", - example=[["P0", "High priority requirement"], ["P1", "Medium priority requirement"]], + example=[["P0", "..."], ["P1", "..."]], ) UI_DESIGN_DRAFT = ActionNode( diff --git a/metagpt/provider/base_gpt_api.py b/metagpt/provider/base_gpt_api.py index 6c1dc8338..c38576806 100644 --- a/metagpt/provider/base_gpt_api.py +++ b/metagpt/provider/base_gpt_api.py @@ -49,8 +49,8 @@ class BaseGPTAPI(BaseChatbot): message = ( [self._default_system_msg(), self._user_msg(msg)] if self.use_system_prompt else [self._user_msg(msg)] ) - rsp = await self.acompletion_text(message, stream=stream) logger.debug(message) + rsp = await self.acompletion_text(message, stream=stream) # logger.debug(rsp) return rsp diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index 2f99d132e..f1e65b177 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -72,7 +72,7 @@ class Engineer(Role): name: str = "Alex", profile: str = "Engineer", goal: str = "write elegant, readable, extensible, efficient code", - constraints: str = "the code should conform to standards like PEP8 and be modular and maintainable. " + constraints: str = "the code should conform to standards like google-style and be modular and maintainable. " "Use same language as user requirement", n_borg: int = 1, use_code_review: bool = False, @@ -105,7 +105,9 @@ class Engineer(Role): coding_context = await todo.run() # Code review if review: - coding_context = await WriteCodeReview(context=coding_context, llm=self._llm).run() + action = WriteCodeReview(context=coding_context, llm=self._llm) + self._init_action_system_message(action) + coding_context = await action.run() await src_file_repo.save( coding_context.filename, dependencies={coding_context.design_doc.root_relative_path, coding_context.task_doc.root_relative_path}, @@ -224,6 +226,7 @@ class Engineer(Role): task_doc = await task_file_repo.get(i.name) elif str(i.parent) == SYSTEM_DESIGN_FILE_REPO: design_doc = await design_file_repo.get(i.name) + # FIXME: design doc没有加载进来,是None context = CodingContext(filename=filename, design_doc=design_doc, task_doc=task_doc, code_doc=old_code_doc) return context diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 633ad6051..66475da72 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -134,6 +134,7 @@ class Role: self._setting = RoleSetting( name=name, profile=profile, goal=goal, constraints=constraints, desc=desc, is_human=is_human ) + self._llm.system_prompt = self._get_prefix() self._states = [] self._actions = [] self._role_id = str(self._setting) @@ -144,6 +145,9 @@ class Role: self._states = [] self._actions = [] + def _init_action_system_message(self, action: Action): + action.set_prefix(self._get_prefix(), self.profile) + def _init_actions(self, actions): self._reset() for idx, action in enumerate(actions): @@ -158,7 +162,7 @@ class Role: ) i = action # i.set_env(self._rc.env) - i.set_prefix(self._get_prefix(), self.profile) + self._init_action_system_message(i) self._actions.append(i) self._states.append(f"{idx}. {action}") diff --git a/metagpt/schema.py b/metagpt/schema.py index baed5582b..799bb9253 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -259,7 +259,7 @@ class MessageQueue: class CodingContext(BaseModel): filename: str - design_doc: Document + design_doc: Optional[Document] task_doc: Optional[Document] code_doc: Optional[Document] From 222694c329d5bddc412317d4e20c774d391776b3 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 00:37:10 +0800 Subject: [PATCH 29/38] fix bugs --- metagpt/actions/write_code.py | 13 +++++++++---- metagpt/actions/write_code_review.py | 2 +- metagpt/schema.py | 6 ++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index a91e4ee1e..5960e2621 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -71,7 +71,7 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenc # Instruction: Based on the context, follow "Format example", write code. -## Code: {filename} Write code with triple quoto, based on the following attentions and context. +## Code: {filename}. Write code with triple quoto, based on the following attentions and context. 1. Only One file: do your best to implement THIS ONLY ONE FILE. 2. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. 3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. AVOID circular import. @@ -100,7 +100,7 @@ class WriteCode(Action): filename="test_" + coding_context.filename + ".json", relative_path=TEST_OUTPUTS_FILE_REPO ) summary_doc = None - if coding_context.design_doc.filename: + if coding_context.design_doc and coding_context.design_doc.filename: summary_doc = await FileRepository.get_file( filename=coding_context.design_doc.filename, relative_path=CODE_SUMMARIES_FILE_REPO ) @@ -108,9 +108,14 @@ class WriteCode(Action): if test_doc: test_detail = RunCodeResult.loads(test_doc.content) logs = test_detail.stderr - code_context = await self.get_codes(coding_context.task_doc, exclude=self.context.filename) + + if bug_feedback: + code_context = coding_context.code_doc.content + else: + code_context = await self.get_codes(coding_context.task_doc, exclude=self.context.filename) + prompt = PROMPT_TEMPLATE.format( - design=coding_context.design_doc.content, + design=coding_context.design_doc.content if coding_context.design_doc else "", tasks=coding_context.task_doc.content if coding_context.task_doc else "", code=code_context, logs=logs, diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index f63a399a9..62e96acd8 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -124,7 +124,7 @@ class WriteCodeReview(Action): code_context = await WriteCode.get_codes(self.context.task_doc, exclude=self.context.filename) context = "\n".join( [ - "## System Design\n" + self.context.design_doc.content + "\n", + "## System Design\n" + str(self.context.design_doc) + "\n", "## Tasks\n" + task_content + "\n", "## Code Files\n" + code_context + "\n", ] diff --git a/metagpt/schema.py b/metagpt/schema.py index 799bb9253..758149efa 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -74,6 +74,12 @@ class Document(BaseModel): return None return str(CONFIG.git_repo.workdir / self.root_path / self.filename) + def __str__(self): + return self.content + + def __repr__(self): + return self.content + class Documents(BaseModel): """A class representing a collection of documents. From 126bcdafb966ef694dcf764dc98302bc57497f27 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 10:44:18 +0800 Subject: [PATCH 30/38] fix error msg --- metagpt/utils/git_repository.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metagpt/utils/git_repository.py b/metagpt/utils/git_repository.py index 1340b1768..d2bdf5d85 100644 --- a/metagpt/utils/git_repository.py +++ b/metagpt/utils/git_repository.py @@ -233,6 +233,8 @@ class GitRepository: files = [] try: directory_path = Path(self.workdir) / relative_path + if not directory_path.exists(): + return [] for file_path in directory_path.iterdir(): if file_path.is_file(): rpath = file_path.relative_to(root_relative_path) From 862707d4b7bd319873e550010253a8df0844f6b8 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 10:56:08 +0800 Subject: [PATCH 31/38] use react instead of _react --- metagpt/roles/role.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 66475da72..b673c330d 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -412,7 +412,7 @@ class Role: logger.debug(f"{self._setting}: no news. waiting.") return - rsp = await self._react() + rsp = await self.react() # Reset the next action to be taken. self._rc.todo = None From b97ca3af7ecf980d3ce00675a632c66b9d0989f0 Mon Sep 17 00:00:00 2001 From: geekan Date: Thu, 14 Dec 2023 23:54:38 +0800 Subject: [PATCH 32/38] feat: resolve conflicts --- metagpt/actions/action.py | 1 + metagpt/actions/action_node.py | 2 +- metagpt/actions/project_management.py | 2 +- metagpt/actions/write_code.py | 72 ++++++++++---------- metagpt/actions/write_code_review.py | 95 +++++++++++++++------------ metagpt/actions/write_prd_an.py | 2 +- metagpt/provider/base_gpt_api.py | 2 +- metagpt/roles/engineer.py | 7 +- metagpt/roles/role.py | 8 ++- metagpt/schema.py | 8 ++- metagpt/utils/common.py | 9 ++- metagpt/utils/git_repository.py | 2 + 12 files changed, 120 insertions(+), 90 deletions(-) diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 1d9be60e0..6c1f63f45 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -44,6 +44,7 @@ class Action(ABC): self.prefix = prefix self.profile = profile self.llm.system_prompt = prefix + return self def __str__(self): return self.__class__.__name__ diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 9fb10f35c..1d808ec70 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -243,7 +243,7 @@ class ActionNode: ) return prompt - @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) + @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6)) async def _aask_v1( self, prompt: str, diff --git a/metagpt/actions/project_management.py b/metagpt/actions/project_management.py index c95be4012..1f14e7944 100644 --- a/metagpt/actions/project_management.py +++ b/metagpt/actions/project_management.py @@ -99,7 +99,7 @@ class WriteTasks(Action): async def _merge(self, system_design_doc, task_doc, format=CONFIG.prompt_format) -> Document: context = NEW_REQ_TEMPLATE.format(context=system_design_doc.content, old_tasks=task_doc.content) node = await PM_NODE.fill(context, self.llm, format) - task_doc.content = node.content + task_doc.content = node.instruct_content.json(ensure_ascii=False) return task_doc @staticmethod diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index b759f4e2a..5960e2621 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -34,59 +34,52 @@ from metagpt.utils.file_repository import FileRepository PROMPT_TEMPLATE = """ NOTICE -Role: You are a professional engineer; the main goal is to write PEP8 compliant, elegant, modular, easy to read and maintain Python 3.9 code (but you can also use other programming language) +Role: You are a professional engineer; the main goal is to write google-style, elegant, modular, easy to read and maintain code Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenced "Format example". ------ -# Design -```json +# Context +## Design {design} -``` ------ -# Tasks -```json + +## Tasks {tasks} -``` ------ -# Legacy Code -```python + +## Legacy Code +```Code {code} ``` ------ -# Debug logs + +## Debug logs ```text {logs} {summary_log} ``` ------ -# Bug Feedback logs + +## Bug Feedback logs ```text {feedback} ``` ------ - -## Code: {filename} Write code with triple quoto, based on the following list and context. -1. Do your best to implement THIS ONLY ONE FILE. ONLY USE EXISTING API. IF NO API, IMPLEMENT IT. -2. Requirement: Based on the context, implement one following code file, note to return only in code form, your code will be part of the entire project, so please implement complete, reliable, reusable code snippets -3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. -4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. -5. Think before writing: What should be implemented and provided in this document? -6. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. -7. Do not use public member functions that do not exist in your design. -8. Before using a variable, make sure you reference it first -9. Write out EVERY DETAIL, DON'T LEAVE TODO. - -## Format example ------ +# Format example ## Code: {filename} ```python ## {filename} ... ``` ------ + +# Instruction: Based on the context, follow "Format example", write code. + +## Code: {filename}. Write code with triple quoto, based on the following attentions and context. +1. Only One file: do your best to implement THIS ONLY ONE FILE. +2. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets. +3. Set default value: If there is any setting, ALWAYS SET A DEFAULT VALUE, ALWAYS USE STRONG TYPE AND EXPLICIT VARIABLE. AVOID circular import. +4. Follow design: YOU MUST FOLLOW "Data structures and interfaces". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design. +5. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE. +6. Before using a external variable/module, make sure you import it first. +7. Write out EVERY CODE DETAIL, DON'T LEAVE TODO. + """ @@ -107,7 +100,7 @@ class WriteCode(Action): filename="test_" + coding_context.filename + ".json", relative_path=TEST_OUTPUTS_FILE_REPO ) summary_doc = None - if coding_context.design_doc.filename: + if coding_context.design_doc and coding_context.design_doc.filename: summary_doc = await FileRepository.get_file( filename=coding_context.design_doc.filename, relative_path=CODE_SUMMARIES_FILE_REPO ) @@ -115,9 +108,14 @@ class WriteCode(Action): if test_doc: test_detail = RunCodeResult.loads(test_doc.content) logs = test_detail.stderr - code_context = await self.get_codes(coding_context.task_doc, exclude=self.context.filename) + + if bug_feedback: + code_context = coding_context.code_doc.content + else: + code_context = await self.get_codes(coding_context.task_doc, exclude=self.context.filename) + prompt = PROMPT_TEMPLATE.format( - design=coding_context.design_doc.content, + design=coding_context.design_doc.content if coding_context.design_doc else "", tasks=coding_context.task_doc.content if coding_context.task_doc else "", code=code_context, logs=logs, @@ -148,5 +146,5 @@ class WriteCode(Action): doc = await src_file_repo.get(filename=filename) if not doc: continue - codes.append(doc.content) - return "\n----------\n".join(codes) + codes.append(f"----- {filename}\n" + doc.content) + return "\n".join(codes) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 75313fea5..62e96acd8 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -18,8 +18,8 @@ from metagpt.schema import CodingContext from metagpt.utils.common import CodeParser PROMPT_TEMPLATE = """ -NOTICE -Role: You are a professional software engineer, and your main task is to review the code. You need to ensure that the code conforms to the PEP8 standards, is elegantly designed and modularized, easy to read and maintain, and is written in Python 3.9 (or in another programming language). +# System +Role: You are a professional software engineer, and your main task is to review and revise the code. You need to ensure that the code conforms to the google-style standards, is elegantly designed and modularized, easy to read and maintain. Language: Please use the same language as the user requirement, but the title and code should be still in English. For example, if the user speaks Chinese, the specific text of your answer should also be in Chinese. ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenced "Format example". @@ -27,53 +27,52 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenc {context} ## Code to be Reviewed: {filename} -``` +```Code {code} ``` +""" ------ -## Code Review: Based on the "Code to be Reviewed", provide key, clear, concise, and specific code modification suggestions, up to 5. +EXAMPLE_AND_INSTRUCTION = """ + +{format_example} + + +# Instruction: Based on the actual code situation, follow one of the "Format example". + +## Code Review: Ordered List. Based on the "Code to be Reviewed", provide key, clear, concise, and specific answer. If any answer is no, explain how to fix it step by step. 1. Is the code implemented as per the requirements? If not, how to achieve it? Analyse it step by step. 2. Is the code logic completely correct? If there are errors, please indicate how to correct them. 3. Does the existing code follow the "Data structures and interfaces"? 4. Are all functions implemented? If there is no implementation, please indicate how to achieve it step by step. 5. Have all necessary pre-dependencies been imported? If not, indicate which ones need to be imported -6. Is the code implemented concisely enough? Are methods from other files being reused correctly? +6. Are methods from other files being reused correctly? -## Code Review Result: If the code doesn't have bugs, we don't need to rewrite it, so answer LGTM and stop. ONLY ANSWER LGTM/LBTM. +## Actions: Ordered List. Things that should be done after CR, such as implementing class A and function B + +## Code Review Result: str. If the code doesn't have bugs, we don't need to rewrite it, so answer LGTM and stop. ONLY ANSWER LGTM/LBTM. LGTM/LBTM -## Rewrite Code: if it still has some bugs, rewrite {filename} based on "Code Review" with triple quotes, try to get LGTM. Do your utmost to optimize THIS SINGLE FILE. Implement ALL TODO. RETURN ALL CODE, NEVER OMIT ANYTHING. 以任何方式省略代码都是不允许的。 -``` -``` - -## Format example -{format_example} - """ FORMAT_EXAMPLE = """ ------ -# EXAMPLE 1 +# Format example 1 ## Code Review: {filename} -1. No, we should add the logic of ... +1. No, we should fix the logic of class A due to ... 2. ... 3. ... -4. ... +4. No, function B is not implemented, ... 5. ... 6. ... -## Code Review Result: {filename} +## Actions +1. fix class A +2. implement function B + +## Code Review Result LBTM -## Rewrite Code: {filename} -```python -## {filename} -... -``` ------ -# EXAMPLE 2 +# Format example 2 ## Code Review: {filename} 1. Yes. 2. Yes. @@ -82,12 +81,20 @@ LBTM 5. Yes. 6. Yes. -## Code Review Result: {filename} -LGTM - -## Rewrite Code: {filename} +## Actions pass ------ + +## Code Review Result +LGTM +""" + +REWRITE_CODE_TEMPLATE = """ +# Instruction: rewrite code based on the Code Review and Actions +## Rewrite Code: CodeBlock. If it still has some bugs, rewrite {filename} with triple quotes. Do your utmost to optimize THIS SINGLE FILE. Return all completed codes and prohibit the return of unfinished codes. +```Code +## {filename} +... +``` """ @@ -96,11 +103,15 @@ class WriteCodeReview(Action): super().__init__(name, context, llm) @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) - async def write_code_review_and_rewrite(self, prompt): - code_rsp = await self._aask(prompt) - result = CodeParser.parse_block("Code Review Result", code_rsp) + async def write_code_review_and_rewrite(self, context_prompt, cr_prompt, filename): + cr_rsp = await self._aask(context_prompt + cr_prompt) + result = CodeParser.parse_block("Code Review Result", cr_rsp) if "LGTM" in result: return result, None + + # if LBTM, rewrite code + rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" + code_rsp = await self._aask(rewrite_prompt) code = CodeParser.parse_code(block="", text=code_rsp) return result, code @@ -111,23 +122,23 @@ class WriteCodeReview(Action): format_example = FORMAT_EXAMPLE.format(filename=self.context.code_doc.filename) task_content = self.context.task_doc.content if self.context.task_doc else "" code_context = await WriteCode.get_codes(self.context.task_doc, exclude=self.context.filename) - context = "\n----------\n".join( + context = "\n".join( [ - "```text\n" + self.context.design_doc.content + "```\n", - "```text\n" + task_content + "```\n", - "```python\n" + code_context + "```\n", + "## System Design\n" + str(self.context.design_doc) + "\n", + "## Tasks\n" + task_content + "\n", + "## Code Files\n" + code_context + "\n", ] ) - prompt = PROMPT_TEMPLATE.format( + context_prompt = PROMPT_TEMPLATE.format( context=context, code=iterative_code, filename=self.context.code_doc.filename, - format_example=format_example, ) + cr_prompt = EXAMPLE_AND_INSTRUCTION.format(format_example=format_example, ) logger.info( - f"Code review and rewrite {self.context.code_doc.filename,}: {i+1}/{k} | {len(iterative_code)=}, {len(self.context.code_doc.content)=}" + f"Code review and rewrite {self.context.code_doc.filename}: {i+1}/{k} | {len(iterative_code)=}, {len(self.context.code_doc.content)=}" ) - result, rewrited_code = await self.write_code_review_and_rewrite(prompt) + result, rewrited_code = await self.write_code_review_and_rewrite(context_prompt, cr_prompt, self.context.code_doc.filename) if "LBTM" in result: iterative_code = rewrited_code elif "LGTM" in result: diff --git a/metagpt/actions/write_prd_an.py b/metagpt/actions/write_prd_an.py index 68402e504..d96c0aeac 100644 --- a/metagpt/actions/write_prd_an.py +++ b/metagpt/actions/write_prd_an.py @@ -93,7 +93,7 @@ REQUIREMENT_POOL = ActionNode( key="Requirement Pool", expected_type=list[list[str]], instruction="List down the requirements with their priority (P0, P1, P2).", - example=[["P0", "High priority requirement"], ["P1", "Medium priority requirement"]], + example=[["P0", "..."], ["P1", "..."]], ) UI_DESIGN_DRAFT = ActionNode( diff --git a/metagpt/provider/base_gpt_api.py b/metagpt/provider/base_gpt_api.py index 6c1dc8338..c38576806 100644 --- a/metagpt/provider/base_gpt_api.py +++ b/metagpt/provider/base_gpt_api.py @@ -49,8 +49,8 @@ class BaseGPTAPI(BaseChatbot): message = ( [self._default_system_msg(), self._user_msg(msg)] if self.use_system_prompt else [self._user_msg(msg)] ) - rsp = await self.acompletion_text(message, stream=stream) logger.debug(message) + rsp = await self.acompletion_text(message, stream=stream) # logger.debug(rsp) return rsp diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index 2f99d132e..f1e65b177 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -72,7 +72,7 @@ class Engineer(Role): name: str = "Alex", profile: str = "Engineer", goal: str = "write elegant, readable, extensible, efficient code", - constraints: str = "the code should conform to standards like PEP8 and be modular and maintainable. " + constraints: str = "the code should conform to standards like google-style and be modular and maintainable. " "Use same language as user requirement", n_borg: int = 1, use_code_review: bool = False, @@ -105,7 +105,9 @@ class Engineer(Role): coding_context = await todo.run() # Code review if review: - coding_context = await WriteCodeReview(context=coding_context, llm=self._llm).run() + action = WriteCodeReview(context=coding_context, llm=self._llm) + self._init_action_system_message(action) + coding_context = await action.run() await src_file_repo.save( coding_context.filename, dependencies={coding_context.design_doc.root_relative_path, coding_context.task_doc.root_relative_path}, @@ -224,6 +226,7 @@ class Engineer(Role): task_doc = await task_file_repo.get(i.name) elif str(i.parent) == SYSTEM_DESIGN_FILE_REPO: design_doc = await design_file_repo.get(i.name) + # FIXME: design doc没有加载进来,是None context = CodingContext(filename=filename, design_doc=design_doc, task_doc=task_doc, code_doc=old_code_doc) return context diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 633ad6051..b673c330d 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -134,6 +134,7 @@ class Role: self._setting = RoleSetting( name=name, profile=profile, goal=goal, constraints=constraints, desc=desc, is_human=is_human ) + self._llm.system_prompt = self._get_prefix() self._states = [] self._actions = [] self._role_id = str(self._setting) @@ -144,6 +145,9 @@ class Role: self._states = [] self._actions = [] + def _init_action_system_message(self, action: Action): + action.set_prefix(self._get_prefix(), self.profile) + def _init_actions(self, actions): self._reset() for idx, action in enumerate(actions): @@ -158,7 +162,7 @@ class Role: ) i = action # i.set_env(self._rc.env) - i.set_prefix(self._get_prefix(), self.profile) + self._init_action_system_message(i) self._actions.append(i) self._states.append(f"{idx}. {action}") @@ -408,7 +412,7 @@ class Role: logger.debug(f"{self._setting}: no news. waiting.") return - rsp = await self._react() + rsp = await self.react() # Reset the next action to be taken. self._rc.todo = None diff --git a/metagpt/schema.py b/metagpt/schema.py index baed5582b..758149efa 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -74,6 +74,12 @@ class Document(BaseModel): return None return str(CONFIG.git_repo.workdir / self.root_path / self.filename) + def __str__(self): + return self.content + + def __repr__(self): + return self.content + class Documents(BaseModel): """A class representing a collection of documents. @@ -259,7 +265,7 @@ class MessageQueue: class CodingContext(BaseModel): filename: str - design_doc: Document + design_doc: Optional[Document] task_doc: Optional[Document] code_doc: Optional[Document] diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index f08519f8e..a9bdd6e2d 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -223,10 +223,15 @@ class CodeParser: # 遍历所有的block for block in blocks: # 如果block不为空,则继续处理 - if block.strip() != "": + if block.strip() == "": + continue + if "\n" not in block: + block_title = block + block_content = "" + else: # 将block的标题和内容分开,并分别去掉前后的空白字符 block_title, block_content = block.split("\n", 1) - block_dict[block_title.strip()] = block_content.strip() + block_dict[block_title.strip()] = block_content.strip() return block_dict diff --git a/metagpt/utils/git_repository.py b/metagpt/utils/git_repository.py index 1340b1768..d2bdf5d85 100644 --- a/metagpt/utils/git_repository.py +++ b/metagpt/utils/git_repository.py @@ -233,6 +233,8 @@ class GitRepository: files = [] try: directory_path = Path(self.workdir) / relative_path + if not directory_path.exists(): + return [] for file_path in directory_path.iterdir(): if file_path.is_file(): rpath = file_path.relative_to(root_relative_path) From 60957372fcb3a810f931443b3a8d7bbcbf1d4e2a Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 11:36:24 +0800 Subject: [PATCH 33/38] tuning log level --- metagpt/config.py | 2 +- metagpt/team.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/config.py b/metagpt/config.py index d04da1d91..8ad42c99f 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -46,7 +46,7 @@ class Config(metaclass=Singleton): def __init__(self, yaml_file=default_yaml_file): self._init_with_config_files_and_env(yaml_file) - logger.info("Config loading done.") + logger.debug("Config loading done.") self._update() def _update(self): diff --git a/metagpt/team.py b/metagpt/team.py index e1b2a9ffc..a5c405f80 100644 --- a/metagpt/team.py +++ b/metagpt/team.py @@ -63,7 +63,7 @@ class Team(BaseModel): while n_round > 0: # self._save() n_round -= 1 - logger.info(f"max {n_round=} left.") + logger.debug(f"max {n_round=} left.") self._check_balance() await self.env.run() if CONFIG.git_repo: From 3a448a7bb48fefea3a2e377ab42e44a3ddd4deb4 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 11:54:30 +0800 Subject: [PATCH 34/38] config: adjust default values --- config/config.yaml | 10 +++++----- metagpt/config.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index ef8575e43..8fd208c59 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -77,8 +77,8 @@ RPM: 10 #### for Stable Diffusion ## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui -SD_URL: "YOUR_SD_URL" -SD_T2I_API: "/sdapi/v1/txt2img" +#SD_URL: "YOUR_SD_URL" +#SD_T2I_API: "/sdapi/v1/txt2img" #### for Execution #LONG_TERM_MEMORY: false @@ -93,8 +93,8 @@ SD_T2I_API: "/sdapi/v1/txt2img" # CALC_USAGE: false ### for Research -MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo -MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k +# MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo +# MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k ### choose the engine for mermaid conversion, # default is nodejs, you can change it to playwright,pyppeteer or ink @@ -108,4 +108,4 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k ### repair operation on the content extracted from LLM's raw output. Warning, it improves the result but not fix all cases. # REPAIR_LLM_OUTPUT: false -PROMPT_FORMAT: json #json or markdown \ No newline at end of file +# PROMPT_FORMAT: json #json or markdown \ No newline at end of file diff --git a/metagpt/config.py b/metagpt/config.py index 8ad42c99f..19bd02c87 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -114,7 +114,7 @@ class Config(metaclass=Singleton): self.pyppeteer_executable_path = self._get("PYPPETEER_EXECUTABLE_PATH", "") self.repair_llm_output = self._get("REPAIR_LLM_OUTPUT", False) - self.prompt_format = self._get("PROMPT_FORMAT", "markdown") + self.prompt_format = self._get("PROMPT_FORMAT", "json") self.workspace_path = Path(self._get("WORKSPACE_PATH", DEFAULT_WORKSPACE_ROOT)) self._ensure_workspace_exists() From 2c68b42432a86f1b1de95bb5e8ede2ba79efcc03 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 12:06:27 +0800 Subject: [PATCH 35/38] action: add example --- metagpt/actions/write_code_review.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 62e96acd8..4b3e9aece 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -66,8 +66,28 @@ FORMAT_EXAMPLE = """ 6. ... ## Actions -1. fix class A -2. implement function B +1. Fix the `handle_events` method to update the game state only if a move is successful. + ```python + def handle_events(self): + for event in pygame.event.get(): + if event.type == pygame.QUIT: + return False + if event.type == pygame.KEYDOWN: + moved = False + if event.key == pygame.K_UP: + moved = self.game.move('UP') + elif event.key == pygame.K_DOWN: + moved = self.game.move('DOWN') + elif event.key == pygame.K_LEFT: + moved = self.game.move('LEFT') + elif event.key == pygame.K_RIGHT: + moved = self.game.move('RIGHT') + if moved: + # Update the game state only if a move was successful + self.render() + return True + ``` +2. Implement function B ## Code Review Result LBTM From bc9f0f190269c23050a4ebf54b3ac6e23af0d68e Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 12:17:26 +0800 Subject: [PATCH 36/38] workspace path update --- metagpt/actions/prepare_documents.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/prepare_documents.py b/metagpt/actions/prepare_documents.py index 05255dcc5..8d3445ae4 100644 --- a/metagpt/actions/prepare_documents.py +++ b/metagpt/actions/prepare_documents.py @@ -27,8 +27,8 @@ class PrepareDocuments(Action): # Create and initialize the workspace folder, initialize the Git environment. project_name = CONFIG.project_name or FileRepository.new_filename() workdir = CONFIG.project_path - if not workdir and CONFIG.workspace: - workdir = Path(CONFIG.workspace) / project_name + if not workdir and CONFIG.workspace_path: + workdir = Path(CONFIG.workspace_path) / project_name workdir = Path(workdir or DEFAULT_WORKSPACE_ROOT / project_name) if not CONFIG.inc and workdir.exists(): shutil.rmtree(workdir) From a3d7b0f380c8305ce51f0675af74d2e438b7e2b0 Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 13:19:04 +0800 Subject: [PATCH 37/38] CR update --- metagpt/actions/action.py | 1 - metagpt/actions/action_node.py | 3 ++- metagpt/actions/research.py | 2 +- metagpt/actions/summarize_code.py | 14 +++++++------- metagpt/roles/role.py | 16 ---------------- 5 files changed, 10 insertions(+), 26 deletions(-) diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index 6c1f63f45..1534b1f4d 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -43,7 +43,6 @@ class Action(ABC): """Set prefix for later usage""" self.prefix = prefix self.profile = profile - self.llm.system_prompt = prefix return self def __str__(self): diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 1d808ec70..fb7d621d8 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -70,7 +70,8 @@ class ActionNode: content: str instruct_content: BaseModel - def __init__(self, key, expected_type, instruction, example, content="", children=None): + def __init__(self, key: str, expected_type: Type, instruction: str, example: str, content: str = "", + children: dict[str, "ActionNode"] = None): self.key = key self.expected_type = expected_type self.instruction = instruction diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index d7a2a7e38..a70038c51 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -114,7 +114,7 @@ class CollectLinks(Action): keywords = OutputParser.extract_struct(keywords, list) keywords = parse_obj_as(list[str], keywords) except Exception as e: - logger.exception(f'fail to get keywords related to the research topic "{topic}" for {e}') + logger.exception(f"fail to get keywords related to the research topic '{topic}' for {e}") keywords = [topic] results = await asyncio.gather(*(self.search_engine.run(i, as_string=False) for i in keywords)) diff --git a/metagpt/actions/summarize_code.py b/metagpt/actions/summarize_code.py index 413ac2a21..f8d8d2b47 100644 --- a/metagpt/actions/summarize_code.py +++ b/metagpt/actions/summarize_code.py @@ -34,13 +34,13 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. Output format carefully referenc ----- {code_blocks} -## Code Review All: 请你对历史所有文件进行阅读,在文件中找到可能的bug,如函数未实现、调用错误、未引用等 +## Code Review All: Please read all historical files and find possible bugs in the files, such as unimplemented functions, calling errors, unreferences, etc. -## Call flow: mermaid代码,根据实现的函数,使用mermaid绘制完整的调用链 +## Call flow: mermaid code, based on the implemented function, use mermaid to draw a complete call chain -## Summary: 根据历史文件的实现情况进行总结 +## Summary: Summary based on the implementation of historical files -## TODOs: Python dict[str, str],这里写出需要修改的文件列表与理由,我们会在之后进行修改 +## TODOs: Python dict[str, str], write down the list of files that need to be modified and the reasons. We will modify them later. """ @@ -49,9 +49,9 @@ FORMAT_EXAMPLE = """ ## Code Review All ### a.py -- 它少实现了xxx需求... -- 字段yyy没有给出... -- ... +- It fulfills less of xxx requirements... +- Field yyy is not given... +-... ### b.py ... diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index b673c330d..b07541b09 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -218,22 +218,6 @@ class Role: if env: env.set_subscription(self, self._subscription) - # # Replaced by FileRepository.set_file - # def set_doc(self, content: str, filename: str): - # return self._rc.env.set_doc(content, filename) - # - # # Replaced by FileRepository.get_file - # def get_doc(self, filename: str): - # return self._rc.env.get_doc(filename) - # - # # Replaced by CONFIG.xx - # def set(self, k, v): - # return self._rc.env.set(k, v) - # - # # Replaced by CONFIG.xx - # def get(self, k): - # return self._rc.env.get(k) - @property def profile(self): """Get the role description (position)""" From df2e9a12be0f9c891405b54fb17c23640d404aae Mon Sep 17 00:00:00 2001 From: geekan Date: Fri, 15 Dec 2023 12:17:26 +0800 Subject: [PATCH 38/38] workspace path update --- metagpt/actions/debug_error.py | 2 +- metagpt/actions/prepare_documents.py | 4 ++-- metagpt/document.py | 1 + metagpt/document_store/document.py | 1 + metagpt/roles/qa_engineer.py | 5 +++-- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/metagpt/actions/debug_error.py b/metagpt/actions/debug_error.py index df60c2e61..39f3bc1bc 100644 --- a/metagpt/actions/debug_error.py +++ b/metagpt/actions/debug_error.py @@ -62,7 +62,7 @@ class DebugError(Action): if matches: return "" - logger.info(f"Debug and rewrite {self.context.code_filename}") + logger.info(f"Debug and rewrite {self.context.test_filename}") code_doc = await FileRepository.get_file( filename=self.context.code_filename, relative_path=CONFIG.src_workspace ) diff --git a/metagpt/actions/prepare_documents.py b/metagpt/actions/prepare_documents.py index 05255dcc5..8d3445ae4 100644 --- a/metagpt/actions/prepare_documents.py +++ b/metagpt/actions/prepare_documents.py @@ -27,8 +27,8 @@ class PrepareDocuments(Action): # Create and initialize the workspace folder, initialize the Git environment. project_name = CONFIG.project_name or FileRepository.new_filename() workdir = CONFIG.project_path - if not workdir and CONFIG.workspace: - workdir = Path(CONFIG.workspace) / project_name + if not workdir and CONFIG.workspace_path: + workdir = Path(CONFIG.workspace_path) / project_name workdir = Path(workdir or DEFAULT_WORKSPACE_ROOT / project_name) if not CONFIG.inc and workdir.exists(): shutil.rmtree(workdir) diff --git a/metagpt/document.py b/metagpt/document.py index 6ac4834aa..0af3a915c 100644 --- a/metagpt/document.py +++ b/metagpt/document.py @@ -4,6 +4,7 @@ @Time : 2023/6/8 14:03 @Author : alexanderwu @File : document.py +@Desc : Classes and Operations Related to Files in the File System. """ from enum import Enum from pathlib import Path diff --git a/metagpt/document_store/document.py b/metagpt/document_store/document.py index c59056312..90abc54de 100644 --- a/metagpt/document_store/document.py +++ b/metagpt/document_store/document.py @@ -4,6 +4,7 @@ @Time : 2023/6/8 14:03 @Author : alexanderwu @File : document.py +@Desc : Classes and Operations Related to Vector Files in the Vector Database. Still under design. """ from pathlib import Path diff --git a/metagpt/roles/qa_engineer.py b/metagpt/roles/qa_engineer.py index c1573e63b..4439b9b19 100644 --- a/metagpt/roles/qa_engineer.py +++ b/metagpt/roles/qa_engineer.py @@ -26,6 +26,7 @@ from metagpt.logs import logger from metagpt.roles import Role from metagpt.schema import Document, Message, RunCodeContext, TestingContext from metagpt.utils.common import any_to_str_set, parse_recipient +from metagpt.utils.file_repository import FileRepository class QaEngineer(Role): @@ -125,8 +126,8 @@ class QaEngineer(Role): async def _debug_error(self, msg): run_code_context = RunCodeContext.loads(msg.content) code = await DebugError(context=run_code_context, llm=self._llm).run() - await CONFIG.git_repo.new_file_repository(CONFIG.src_workspace).save( - filename=run_code_context.code_filename, content=code + await FileRepository.save_file( + filename=run_code_context.test_filename, content=code, relative_path=TEST_CODES_FILE_REPO ) run_code_context.output = None self.publish_message(