mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-30 14:35:17 +02:00
feat: +pic2txt
This commit is contained in:
parent
9dc8d7307b
commit
742ff0e80a
11 changed files with 137 additions and 1 deletions
78
metagpt/actions/requirement_analysis/requirement/pic2txt.py
Normal file
78
metagpt/actions/requirement_analysis/requirement/pic2txt.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2024/6/27
|
||||
@Author : mashenquan
|
||||
@File : pic2txt.py
|
||||
"""
|
||||
from typing import List
|
||||
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
||||
from metagpt.actions import Action
|
||||
from metagpt.logs import logger
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import encode_image, general_after_log, to_markdown_code_block
|
||||
|
||||
|
||||
@register_tool(include_functions=["run"])
|
||||
class Pic2Txt(Action):
|
||||
"""Pic2Txt deal with the following situations:
|
||||
1. Given a picture about the user requirements, write out the textual user requirements.
|
||||
"""
|
||||
|
||||
async def run(
|
||||
self,
|
||||
*,
|
||||
image_paths: List[str],
|
||||
textual_user_requirement: str = "",
|
||||
acknowledge: str = "",
|
||||
legacy_output: str = "",
|
||||
evaluation_conclusion: str = "",
|
||||
additional_technical_requirements: str = "",
|
||||
) -> str:
|
||||
base64_images = [encode_image(i) for i in image_paths]
|
||||
prompt = PROMPT.format(
|
||||
textual_user_requirement=textual_user_requirement,
|
||||
acknowledge=to_markdown_code_block(val=acknowledge),
|
||||
legacy_output=to_markdown_code_block(val=legacy_output),
|
||||
evaluation_conclusion=evaluation_conclusion,
|
||||
additional_technical_requirements=to_markdown_code_block(val=additional_technical_requirements),
|
||||
)
|
||||
return await self._write(prompt, base64_images=base64_images)
|
||||
|
||||
@retry(
|
||||
wait=wait_random_exponential(min=1, max=20),
|
||||
stop=stop_after_attempt(6),
|
||||
after=general_after_log(logger),
|
||||
)
|
||||
async def _write(self, prompt: str, base64_images: List[str]) -> str:
|
||||
rsp = await self.llm.aask(prompt, images=base64_images)
|
||||
return rsp
|
||||
|
||||
|
||||
PROMPT = """
|
||||
## Textual User Requirements
|
||||
{textual_user_requirement}
|
||||
|
||||
## Acknowledge
|
||||
{acknowledge}
|
||||
|
||||
## Legacy Outputs
|
||||
{legacy_output}
|
||||
|
||||
## Evaluation Conclusion
|
||||
{evaluation_conclusion}
|
||||
|
||||
## Additional Technical Requirements
|
||||
{additional_technical_requirements}
|
||||
|
||||
---
|
||||
You are a tool that generates an intact textual user requirements given a few of textual fragments of user requirements and some fragments of UI pictures.
|
||||
The content of "Textual User Requirements" provides a few of textual fragments of user requirements;
|
||||
The content of "Acknowledge" provides additional information related to the user requirements;
|
||||
"Legacy Outputs" contains the intact textual user requirements generated by you last time, which you can improve by addressing the issues raised in "Evaluation Conclusion";
|
||||
"Additional Technical Requirements" specifies the additional technical requirements that the generated textual user requirements must meet;
|
||||
你需要将图片中的内容转换成文字描述,合并到"Textual User Requirements",以生成完整的用户需求;
|
||||
Return the intact textual user requirements according to the given fragments of the user requirement of "Textual User Requirements" and the UI pictures;
|
||||
"""
|
||||
|
|
@ -65,7 +65,7 @@ class BaseLLM(ABC):
|
|||
# image url or image base64
|
||||
url = image if image.startswith("http") else f"data:image/jpeg;base64,{image}"
|
||||
# it can with multiple-image inputs
|
||||
content.append({"type": "image_url", "image_url": url})
|
||||
content.append({"type": "image_url", "image_url": {"url": url}})
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
def _assistant_msg(self, msg: str) -> dict[str, str]:
|
||||
|
|
|
|||
30
tests/data/requirements/1.original_requirement.txt
Normal file
30
tests/data/requirements/1.original_requirement.txt
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
3.1.功能总述
|
||||
|
||||
国际小超人钉钉小程序一期支持法务文档一键查看、各国法律意见检索、申请合同模板三个功能。
|
||||
|
||||
1、法务文档提供入口,用户点击后一键进入语雀查看,后续法务在语雀中进行维护。——pc和手机
|
||||
|
||||
2、各国法律意见在钉钉端可进行多维度检索和查看。——pc和手机【待定】
|
||||
|
||||
3、bd可以在钉钉端申请合同模板,提交申请后即可以下载模板。——可以pc和手机申请,但是下载只能通过pc
|
||||
|
||||
小程度底部有3个tab,首页、模板、我的
|
||||
|
||||
3.2.首页
|
||||
|
||||
首页有两个分区,上面部分是法律意见检索栏。
|
||||
|
||||
用户第一次进入小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
|
||||
|
||||
【首页】
|
||||

|
||||
【按国家名维度搜索】
|
||||
|
||||
用户在搜索框中进行检索时采用typeahead,只能下拉选择数据库中有的国家名称。
|
||||

|
||||
【检索结果】
|
||||
|
||||
可根据滚筒切换业务线
|
||||

|
||||

|
||||

|
||||
BIN
tests/data/requirements/pic/1.png
Normal file
BIN
tests/data/requirements/pic/1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 377 KiB |
BIN
tests/data/requirements/pic/2.png
Normal file
BIN
tests/data/requirements/pic/2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 206 KiB |
BIN
tests/data/requirements/pic/3.png
Normal file
BIN
tests/data/requirements/pic/3.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 352 KiB |
BIN
tests/data/requirements/pic/4.png
Normal file
BIN
tests/data/requirements/pic/4.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 84 KiB |
BIN
tests/data/requirements/pic/5.png
Normal file
BIN
tests/data/requirements/pic/5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 93 KiB |
|
|
@ -0,0 +1,28 @@
|
|||
import pytest
|
||||
|
||||
from metagpt.actions.requirement_analysis.requirement.pic2txt import Pic2Txt
|
||||
from metagpt.const import TEST_DATA_PATH
|
||||
from metagpt.utils.common import aread
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pic2txt(context):
|
||||
images = [
|
||||
TEST_DATA_PATH / "requirements/pic/1.png",
|
||||
TEST_DATA_PATH / "requirements/pic/2.png",
|
||||
TEST_DATA_PATH / "requirements/pic/3.png",
|
||||
TEST_DATA_PATH / "requirements/pic/4.png",
|
||||
TEST_DATA_PATH / "requirements/pic/5.png",
|
||||
]
|
||||
textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt")
|
||||
acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md")
|
||||
|
||||
action = Pic2Txt(context=context)
|
||||
rsp = await action.run(
|
||||
image_paths=images, textual_user_requirement=textual_user_requirements, acknowledge=acknowledge
|
||||
)
|
||||
assert rsp
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-s"])
|
||||
Loading…
Add table
Add a link
Reference in a new issue