diff --git a/metagpt/actions/requirement_analysis/requirement/pic2txt.py b/metagpt/actions/requirement_analysis/requirement/pic2txt.py index ae09e1e87..b8f236dac 100644 --- a/metagpt/actions/requirement_analysis/requirement/pic2txt.py +++ b/metagpt/actions/requirement_analysis/requirement/pic2txt.py @@ -20,7 +20,7 @@ from metagpt.utils.common import encode_image, general_after_log, to_markdown_co @register_tool(include_functions=["run"]) class Pic2Txt(Action): """Pic2Txt deal with the following situations: - 1. Given a picture about the user requirements, write out the textual user requirements. + Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements. """ async def run( @@ -28,11 +28,36 @@ class Pic2Txt(Action): *, image_paths: List[str], textual_user_requirement: str = "", - acknowledge: str = "", legacy_output: str = "", evaluation_conclusion: str = "", additional_technical_requirements: str = "", ) -> str: + """ + Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements + + Args: + image_paths (List[str]): A list of file paths to the input image(s) depicting user requirements. + textual_user_requirement (str, optional): Textual user requirement that alongside the given images, if any. + legacy_output (str, optional): The intact textual user requirements generated by you last time, if any. + evaluation_conclusion (str, optional): Conclusion or evaluation based on the processed requirements. + additional_technical_requirements (str, optional): Any supplementary technical details relevant to the process. + + Returns: + str: Textual representation of user requirements extracted from the provided image(s). + + Raises: + ValueError: If image_paths list is empty. + OSError: If there is an issue accessing or reading the image files. + + Example: + >>> images = ["requirements/pic/1.png", "requirements/pic/2.png", "requirements/pic/3.png"] + >>> textual_user_requirements = "User requirement paragraph 1 ..., ![](1.png). paragraph 2...![](2.png)..." + >>> action = Pic2Txt() + >>> intact_textual_user_requirements = await action.run(image_paths=images, textual_user_requirement=textual_user_requirements) + >>> print(intact_textual_user_requirements) + "User requirement paragraph 1 ..., ![...](1.png) This picture describes... paragraph 2...![...](2.png)..." + + """ descriptions = {} for i in image_paths: filename = Path(i) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 0263da989..a41c8b0a6 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -40,8 +40,17 @@ from metagpt.utils.token_counter import ( ) -@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL, LLMType.YI, - LLMType.OPEN_ROUTER]) +@register_provider( + [ + LLMType.OPENAI, + LLMType.FIREWORKS, + LLMType.OPEN_LLM, + LLMType.MOONSHOT, + LLMType.MISTRAL, + LLMType.YI, + LLMType.OPEN_ROUTER, + ] +) class OpenAILLM(BaseLLM): """Check https://platform.openai.com/examples for examples""" diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index d4d67742b..7b1917f13 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -83,7 +83,7 @@ class DataAnalyst(DataInterpreter): # print(*context, sep="\n" + "*" * 5 + "\n") async with ThoughtReporter(enable_llm_stream=True): rsp = await self.llm.aask(context) - self.commands = json.loads(CodeParser.parse_code(block=None, lang='json', text=rsp)) + self.commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=rsp)) self.rc.working_memory.add(Message(content=rsp, role="assistant")) await run_commands(self, self.commands, self.rc.working_memory) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 906c5583c..49a3fef33 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -11,7 +11,11 @@ from pydantic import model_validator from metagpt.actions import Action from metagpt.actions.di.run_command import RunCommand from metagpt.logs import logger -from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION, JSON_REPAIR_PROMPT +from metagpt.prompts.di.role_zero import ( + CMD_PROMPT, + JSON_REPAIR_PROMPT, + ROLE_INSTRUCTION, +) from metagpt.roles import Role from metagpt.schema import AIMessage, Message, UserMessage from metagpt.strategy.experience_retriever import DummyExpRetriever, ExpRetriever @@ -21,8 +25,8 @@ from metagpt.tools.libs.editor import Editor from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser +from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output from metagpt.utils.report import ThoughtReporter -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType @register_tool(include_functions=["ask_human", "reply_to_human"]) @@ -166,7 +170,7 @@ class RoleZero(Role): try: commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) - except json.JSONDecodeError as e: + except json.JSONDecodeError: commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) except Exception as e: diff --git a/metagpt/roles/product_manager.py b/metagpt/roles/product_manager.py index cc8c82bf1..5f70bf390 100644 --- a/metagpt/roles/product_manager.py +++ b/metagpt/roles/product_manager.py @@ -9,9 +9,10 @@ from metagpt.actions import UserRequirement, WritePRD from metagpt.actions.prepare_documents import PrepareDocuments +from metagpt.actions.requirement_analysis.requirement.pic2txt import Pic2Txt from metagpt.roles.di.role_zero import RoleZero from metagpt.roles.role import RoleReactMode -from metagpt.utils.common import any_to_name, any_to_str +from metagpt.utils.common import any_to_name, any_to_str, tool2name from metagpt.utils.git_repository import GitRepository @@ -32,9 +33,9 @@ class ProductManager(RoleZero): constraints: str = "utilize the same language as the user requirements for seamless communication" todo_action: str = any_to_name(WritePRD) - instruction: str = """Use WritePRD tool to write PRD""" + instruction: str = """Use WritePRD tool to write PRD if a PRD is required; Use `Pic2Txt` tool to write out an intact textual user requirements if an intact textual user requiremnt is required given some images alongside the contextual textual descriptions;""" max_react_loop: int = 1 # FIXME: Read and edit files requires more steps, consider later - tools: list[str] = ["Editor:write,read,write_content", "RoleZero", "WritePRD"] + tools: list[str] = ["Editor:write,read,write_content", "RoleZero", "WritePRD", Pic2Txt.__name__] def __init__(self, **kwargs) -> None: super().__init__(**kwargs) @@ -47,12 +48,9 @@ class ProductManager(RoleZero): def _update_tool_execution(self): wp = WritePRD() - self.tool_execution_map.update( - { - "WritePRD.run": wp.run, - "WritePRD": wp.run, # alias - } - ) + self.tool_execution_map.update(tool2name(WritePRD, ["run"], wp.run)) + pic2txt = Pic2Txt() + self.tool_execution_map.update(tool2name(Pic2Txt, ["run"], pic2txt.run)) async def _think(self) -> bool: """Decide what to do""" diff --git a/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py b/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py index 4aa3e6dde..e5875b6ac 100644 --- a/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py +++ b/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py @@ -13,11 +13,11 @@ async def test_pic2txt(context): TEST_DATA_PATH / "requirements/pic/3.png", ] textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt") - acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md") action = Pic2Txt(context=context) rsp = await action.run( - image_paths=images, textual_user_requirement=textual_user_requirements, acknowledge=acknowledge + image_paths=images, + textual_user_requirement=textual_user_requirements, ) assert rsp