feat: +pic2txt

This commit is contained in:
莘权 马 2024-06-27 22:13:41 +08:00
parent 742ff0e80a
commit 632452e2a1
12 changed files with 79 additions and 38 deletions

View file

@ -9,6 +9,7 @@
import asyncio
import json
import uuid
from json import JSONDecodeError
from pathlib import Path
from typing import Dict, List
@ -73,7 +74,10 @@ async def develop(
output_dir = Path(output_dir) if output_dir else DEFAULT_WORKSPACE_ROOT / uuid.uuid4().hex
v = await aread(filename=user_requirement_filename)
user_requirements = json.loads(v)
try:
user_requirements = json.loads(v)
except JSONDecodeError:
user_requirements = [v]
v = await aread(filename=actors_filename)
actors = json.loads(v)
technical_constraint = await aread(filename=constraint_filename)

View file

@ -54,7 +54,7 @@ async def save_framework(
output_dir = (
Path(output_dir)
if output_dir
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%S") + uuid.uuid4().hex[0:8])
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
)
output_dir.mkdir(parents=True, exist_ok=True)

View file

@ -99,8 +99,8 @@ Parts not mentioned in the "Legacy TRD" will be handled by other TRDs, therefore
Do the parameters of the interface of the external system used in the code comply with it's specifications in 'Acknowledge'?
Is there a lack of necessary configuration files?
Return a markdown JSON object with:
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
- an "issues" key containing a string list of natural text about the issues that need to addressed, found in the "Legacy Outputs" if any exits, each issue found must provide a detailed description and include reasons;
- a "conclusion" key containing the evaluation conclusion;
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "Legacy TRD";
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
"""

View file

@ -5,6 +5,8 @@
@Author : mashenquan
@File : pic2txt.py
"""
import json
from pathlib import Path
from typing import List
from tenacity import retry, stop_after_attempt, wait_random_exponential
@ -31,23 +33,41 @@ class Pic2Txt(Action):
evaluation_conclusion: str = "",
additional_technical_requirements: str = "",
) -> str:
base64_images = [encode_image(i) for i in image_paths]
descriptions = {}
for i in image_paths:
filename = Path(i)
base64_image = encode_image(filename)
rsp = await self._pic2txt(
"Generate a paragraph of text based on the content of the image, the language of the text is consistent with the language in the image.",
base64_image=base64_image,
)
descriptions[filename.name] = rsp
prompt = PROMPT.format(
textual_user_requirement=textual_user_requirement,
acknowledge=to_markdown_code_block(val=acknowledge),
acknowledge=to_markdown_code_block(val=json.dumps(descriptions), type_="json"),
legacy_output=to_markdown_code_block(val=legacy_output),
evaluation_conclusion=evaluation_conclusion,
additional_technical_requirements=to_markdown_code_block(val=additional_technical_requirements),
)
return await self._write(prompt, base64_images=base64_images)
return await self._write(prompt)
@retry(
wait=wait_random_exponential(min=1, max=20),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _write(self, prompt: str, base64_images: List[str]) -> str:
rsp = await self.llm.aask(prompt, images=base64_images)
async def _write(self, prompt: str) -> str:
rsp = await self.llm.aask(prompt)
return rsp
@retry(
wait=wait_random_exponential(min=1, max=20),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _pic2txt(self, prompt: str, base64_image: str) -> str:
rsp = await self.llm.aask(prompt, images=base64_image)
return rsp
@ -70,9 +90,9 @@ PROMPT = """
---
You are a tool that generates an intact textual user requirements given a few of textual fragments of user requirements and some fragments of UI pictures.
The content of "Textual User Requirements" provides a few of textual fragments of user requirements;
The content of "Acknowledge" provides additional information related to the user requirements;
The content of "Acknowledge" provides the descriptions of pictures used in "Textual User Requirements";
"Legacy Outputs" contains the intact textual user requirements generated by you last time, which you can improve by addressing the issues raised in "Evaluation Conclusion";
"Additional Technical Requirements" specifies the additional technical requirements that the generated textual user requirements must meet;
你需要将图片中的内容转换成文字描述合并到"Textual User Requirements"以生成完整的用户需求
You need to merge the text content of the corresponding image in the "Acknowledge" into the "Textual User Requirements" to generate a complete, natural and coherent description of the user requirements;
Return the intact textual user requirements according to the given fragments of the user requirement of "Textual User Requirements" and the UI pictures;
"""

View file

@ -107,9 +107,9 @@ If there are interaction events with external systems in "TRD Design", you must
Does the sequence of steps in "Interaction Events" cause performance or cost issues? Please provide detailed descriptions and reasons;
It is problematic if the data stream composed of input/output contains passive or irrelevant data;
Return a markdown JSON object with:
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
- an "issues" key containing a string list of natural text about the issues that need to be addressed, found in the "TRD Design" if any exist, each issue found must provide a detailed description and include reasons;
- a "conclusion" key containing the evaluation conclusion;
- a "correspondence_between" key containing the judgement detail of the natural text string list about the correspondence between "Interaction Events" and "TRD Design" steps;
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "User Requirements";
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
"""

View file

@ -2,6 +2,8 @@
# -*- coding: utf-8 -*-
from __future__ import annotations
import uuid
from datetime import datetime
from pathlib import Path
from typing import Optional
@ -16,7 +18,7 @@ from metagpt.actions.requirement_analysis.trd import (
EvaluateTRD,
WriteTRD,
)
from metagpt.const import ASSISTANT_ALIAS, TEST_DATA_PATH
from metagpt.const import ASSISTANT_ALIAS, DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
from metagpt.context import Context
from metagpt.logs import ToolLogItem, log_tool_output, logger
from metagpt.tools.tool_registry import register_tool
@ -200,6 +202,12 @@ async def write_framework(
evaluation_conclusion = ""
acknowledgement = await mock_asearch_acknowledgement(use_case_actors) # Replaced by acknowledgement_repo later.
loop_count = 0
output_dir = (
Path(output_dir)
if output_dir
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
)
file_list = []
while not is_pass and (context.cost_manager.total_cost < context.cost_manager.max_budget):
try:
framework = await write_framework.run(
@ -226,9 +234,9 @@ async def write_framework(
logger.info(f"Loop {loop_count}")
if context.cost_manager.total_cost < 1 and loop_count > max_loop:
break
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
logger.info(f"Output:\n{file_list}")
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
logger.info(f"Output:\n{file_list}")
return "## Software Framework" + "".join([f"\n- {i}" for i in file_list])
@ -237,13 +245,12 @@ async def write_trd_and_framework(
use_case_actors: str,
user_requirements: str,
additional_technical_requirements: str,
investment: float = 17.0,
investment: float = 50.0,
output_dir: Optional[str] = "",
context: Optional[Context] = None,
) -> str:
context = context or Context(cost_manager=CostManager(max_budget=investment))
trd = await write_trd(use_case_actors=use_case_actors, user_requirements=user_requirements, context=context)
return await write_framework(
use_case_actors=use_case_actors,
trd=trd,

View file

@ -1,4 +1,5 @@
- Using pure javascript without any third-party package, 法务查询者与国际小超人钉钉小程序之间UI用web
- 法务中台网址:`https://mock.apipark.cn/m1/4717294-4369585-default`;
- 法务中台网址:`https://mock.apipark.cn/m1/4717294-4369585-default`, 只有国际小超人钉钉小程序能访问;
- 写代码时,不要单元测试代码;
- 如果使用了接口 ID 6, 它的返回结果要去重复项;
- 如果使用了接口 ID 6, 它的返回结果要去重复项;
- 不需要实现登录相关操作;

View file

@ -1,30 +1,16 @@
3.1.功能总述
国际小超人钉钉小程序一期支持法务文档一键查看、各国法律意见检索、申请合同模板三个功能。
1、法务文档提供入口用户点击后一键进入语雀查看后续法务在语雀中进行维护。——pc和手机
2、各国法律意见在钉钉端可进行多维度检索和查看。——pc和手机【待定】
3、bd可以在钉钉端申请合同模板提交申请后即可以下载模板。——可以pc和手机申请但是下载只能通过pc
小程度底部有3个tab首页、模板、我的
3.2.首页
首页有两个分区,上面部分是法律意见检索栏。
用户第一次进入小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
法务查询者第一次进入国际小超人钉钉小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
【首页】
![](1.png)
【按国家名维度搜索】
用户在搜索框中进行检索时采用typeahead只能下拉选择数据库中有的国家名称。
法务查询者在国际小超人钉钉小程序的搜索框中进行检索时采用typeahead只能下拉选择法务中台中有的国家名称。
![](2.png)
【检索结果】
可根据滚筒切换业务线
![](3.png)
![](4.png)
![](5.png)
法务查询者可根据国际小超人钉钉小程序UI上的滚筒切换业务线
![](3.png)

View file

@ -0,0 +1,25 @@
## Textual User Requirements
### 3.2. 首页
首页有两个分区,上面部分是法律意见检索栏。
法务查询者第一次进入国际小超人钉钉小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
#### 首页
![首页](1.png)
这是一个名为“法务小超人”的移动应用程序的界面截图。界面顶部显示了应用名称和一个可切换语言的按钮“English”。在界面中间部分有一个标题“法律意见查询”以及一个搜索框提示输入国家名称以查询法律意见。下方显示已收录法律意见8394篇。界面下半部分是“法务 Q&A”部分列出了一些法律相关的选项例如“国际法务接入口人”、“国内法务接入口人”、“国际法律协议合同办理指引”和“国内法律协议合同办理指引”。界面底部有三个导航按钮分别是“首页”、“模板”和“我的”。
#### 按国家名维度搜索
法务查询者在国际小超人钉钉小程序的搜索框中进行检索时采用typeahead只能下拉选择法务中台中有的国家名称。
![按国家名维度搜索](2.png)
在这张图像中,用户正在一个名为“法律意见查询”的应用中进行国家名称的搜索。用户在搜索框中输入国家名称时,系统会提供下拉建议。这些建议基于 typeahead 功能,从数据库中筛选出匹配的国家名称供用户选择。目前,搜索结果包含了“中国”和“菲律宾”两个具体的国家名称,其它显示为“国家名”。用户可以通过下拉菜单快速选择所需的国家名称。
#### 检索结果
法务查询者可根据国际小超人钉钉小程序UI上的滚筒切换业务线
![检索结果](3.png)
这张图片展示了一个移动应用的界面,界面标题为“法律意见详情”。用户可以根据具体情况切换业务线。界面中有多个字段,包括“国家名称”、“国家情况描述”、“业务线”、“产品法规分析”和“签约主体”。第一张截图显示了详细的法律情报信息,包含区域名称、区域情况描述、业务线和产品法规概述等字段。第二张截图显示了“法律意见详情”界面,其中列出了国家名称、国家情况描述、业务线、产品法规分析和签约主体。第三张截图与第二张相似,但显示了选项的可选择状态。最下方有“取消”和“确定”的按钮。
法务查询者从国家详情中的业务线名列表中选出要查看的业务线。
#### 查看法律意见详情
国际小超人钉钉小程序用国家代码和业务代码做参数,查询法律意见详情,然后将结果展示给法务查询者。

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 352 KiB

After

Width:  |  Height:  |  Size: 653 KiB

Before After
Before After

View file

@ -11,8 +11,6 @@ async def test_pic2txt(context):
TEST_DATA_PATH / "requirements/pic/1.png",
TEST_DATA_PATH / "requirements/pic/2.png",
TEST_DATA_PATH / "requirements/pic/3.png",
TEST_DATA_PATH / "requirements/pic/4.png",
TEST_DATA_PATH / "requirements/pic/5.png",
]
textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt")
acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md")