diff --git a/metagpt/actions/requirement_analysis/requirement/__init__.py b/metagpt/actions/requirement_analysis/requirement/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/metagpt/actions/requirement_analysis/requirement/pic2txt.py b/metagpt/actions/requirement_analysis/requirement/pic2txt.py new file mode 100644 index 000000000..20ed1028a --- /dev/null +++ b/metagpt/actions/requirement_analysis/requirement/pic2txt.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2024/6/27 +@Author : mashenquan +@File : pic2txt.py +""" +from typing import List + +from tenacity import retry, stop_after_attempt, wait_random_exponential + +from metagpt.actions import Action +from metagpt.logs import logger +from metagpt.tools.tool_registry import register_tool +from metagpt.utils.common import encode_image, general_after_log, to_markdown_code_block + + +@register_tool(include_functions=["run"]) +class Pic2Txt(Action): + """Pic2Txt deal with the following situations: + 1. Given a picture about the user requirements, write out the textual user requirements. + """ + + async def run( + self, + *, + image_paths: List[str], + textual_user_requirement: str = "", + acknowledge: str = "", + legacy_output: str = "", + evaluation_conclusion: str = "", + additional_technical_requirements: str = "", + ) -> str: + base64_images = [encode_image(i) for i in image_paths] + prompt = PROMPT.format( + textual_user_requirement=textual_user_requirement, + acknowledge=to_markdown_code_block(val=acknowledge), + legacy_output=to_markdown_code_block(val=legacy_output), + evaluation_conclusion=evaluation_conclusion, + additional_technical_requirements=to_markdown_code_block(val=additional_technical_requirements), + ) + return await self._write(prompt, base64_images=base64_images) + + @retry( + wait=wait_random_exponential(min=1, max=20), + stop=stop_after_attempt(6), + after=general_after_log(logger), + ) + async def _write(self, prompt: str, base64_images: List[str]) -> str: + rsp = await self.llm.aask(prompt, images=base64_images) + return rsp + + +PROMPT = """ +## Textual User Requirements +{textual_user_requirement} + +## Acknowledge +{acknowledge} + +## Legacy Outputs +{legacy_output} + +## Evaluation Conclusion +{evaluation_conclusion} + +## Additional Technical Requirements +{additional_technical_requirements} + +--- +You are a tool that generates an intact textual user requirements given a few of textual fragments of user requirements and some fragments of UI pictures. +The content of "Textual User Requirements" provides a few of textual fragments of user requirements; +The content of "Acknowledge" provides additional information related to the user requirements; +"Legacy Outputs" contains the intact textual user requirements generated by you last time, which you can improve by addressing the issues raised in "Evaluation Conclusion"; +"Additional Technical Requirements" specifies the additional technical requirements that the generated textual user requirements must meet; +你需要将图片中的内容转换成文字描述,合并到"Textual User Requirements",以生成完整的用户需求; +Return the intact textual user requirements according to the given fragments of the user requirement of "Textual User Requirements" and the UI pictures; +""" diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index db2757ec3..4489c56c5 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -65,7 +65,7 @@ class BaseLLM(ABC): # image url or image base64 url = image if image.startswith("http") else f"data:image/jpeg;base64,{image}" # it can with multiple-image inputs - content.append({"type": "image_url", "image_url": url}) + content.append({"type": "image_url", "image_url": {"url": url}}) return {"role": "user", "content": content} def _assistant_msg(self, msg: str) -> dict[str, str]: diff --git a/tests/data/requirements/1.original_requirement.txt b/tests/data/requirements/1.original_requirement.txt new file mode 100644 index 000000000..7f6b89a1a --- /dev/null +++ b/tests/data/requirements/1.original_requirement.txt @@ -0,0 +1,30 @@ +3.1.功能总述 + +国际小超人钉钉小程序一期支持法务文档一键查看、各国法律意见检索、申请合同模板三个功能。 + +1、法务文档提供入口,用户点击后一键进入语雀查看,后续法务在语雀中进行维护。——pc和手机 + +2、各国法律意见在钉钉端可进行多维度检索和查看。——pc和手机【待定】 + +3、bd可以在钉钉端申请合同模板,提交申请后即可以下载模板。——可以pc和手机申请,但是下载只能通过pc + +小程度底部有3个tab,首页、模板、我的 + +3.2.首页 + +首页有两个分区,上面部分是法律意见检索栏。 + +用户第一次进入小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。 + +【首页】 +![](1.png) +【按国家名维度搜索】 + +用户在搜索框中进行检索时采用typeahead,只能下拉选择数据库中有的国家名称。 +![](2.png) +【检索结果】 + +可根据滚筒切换业务线 +![](3.png) +![](4.png) +![](5.png) \ No newline at end of file diff --git a/tests/data/requirements/pic/1.png b/tests/data/requirements/pic/1.png new file mode 100644 index 000000000..58fca1e94 Binary files /dev/null and b/tests/data/requirements/pic/1.png differ diff --git a/tests/data/requirements/pic/2.png b/tests/data/requirements/pic/2.png new file mode 100644 index 000000000..babecbccf Binary files /dev/null and b/tests/data/requirements/pic/2.png differ diff --git a/tests/data/requirements/pic/3.png b/tests/data/requirements/pic/3.png new file mode 100644 index 000000000..a3c3bf472 Binary files /dev/null and b/tests/data/requirements/pic/3.png differ diff --git a/tests/data/requirements/pic/4.png b/tests/data/requirements/pic/4.png new file mode 100644 index 000000000..86a6bf217 Binary files /dev/null and b/tests/data/requirements/pic/4.png differ diff --git a/tests/data/requirements/pic/5.png b/tests/data/requirements/pic/5.png new file mode 100644 index 000000000..d594baee5 Binary files /dev/null and b/tests/data/requirements/pic/5.png differ diff --git a/tests/metagpt/actions/requirement_analysis/requirement/__init__.py b/tests/metagpt/actions/requirement_analysis/requirement/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py b/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py new file mode 100644 index 000000000..75ab8960b --- /dev/null +++ b/tests/metagpt/actions/requirement_analysis/requirement/test_pic2txt.py @@ -0,0 +1,28 @@ +import pytest + +from metagpt.actions.requirement_analysis.requirement.pic2txt import Pic2Txt +from metagpt.const import TEST_DATA_PATH +from metagpt.utils.common import aread + + +@pytest.mark.asyncio +async def test_pic2txt(context): + images = [ + TEST_DATA_PATH / "requirements/pic/1.png", + TEST_DATA_PATH / "requirements/pic/2.png", + TEST_DATA_PATH / "requirements/pic/3.png", + TEST_DATA_PATH / "requirements/pic/4.png", + TEST_DATA_PATH / "requirements/pic/5.png", + ] + textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt") + acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md") + + action = Pic2Txt(context=context) + rsp = await action.run( + image_paths=images, textual_user_requirement=textual_user_requirements, acknowledge=acknowledge + ) + assert rsp + + +if __name__ == "__main__": + pytest.main([__file__, "-s"])