mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-27 14:25:20 +02:00
feat: +pic2txt
This commit is contained in:
parent
742ff0e80a
commit
632452e2a1
12 changed files with 79 additions and 38 deletions
|
|
@ -9,6 +9,7 @@
|
|||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from json import JSONDecodeError
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
|
|
@ -73,7 +74,10 @@ async def develop(
|
|||
output_dir = Path(output_dir) if output_dir else DEFAULT_WORKSPACE_ROOT / uuid.uuid4().hex
|
||||
|
||||
v = await aread(filename=user_requirement_filename)
|
||||
user_requirements = json.loads(v)
|
||||
try:
|
||||
user_requirements = json.loads(v)
|
||||
except JSONDecodeError:
|
||||
user_requirements = [v]
|
||||
v = await aread(filename=actors_filename)
|
||||
actors = json.loads(v)
|
||||
technical_constraint = await aread(filename=constraint_filename)
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ async def save_framework(
|
|||
output_dir = (
|
||||
Path(output_dir)
|
||||
if output_dir
|
||||
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%S") + uuid.uuid4().hex[0:8])
|
||||
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
|
||||
)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
|
|
|||
|
|
@ -99,8 +99,8 @@ Parts not mentioned in the "Legacy TRD" will be handled by other TRDs, therefore
|
|||
Do the parameters of the interface of the external system used in the code comply with it's specifications in 'Acknowledge'?
|
||||
Is there a lack of necessary configuration files?
|
||||
Return a markdown JSON object with:
|
||||
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
|
||||
- an "issues" key containing a string list of natural text about the issues that need to addressed, found in the "Legacy Outputs" if any exits, each issue found must provide a detailed description and include reasons;
|
||||
- a "conclusion" key containing the evaluation conclusion;
|
||||
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "Legacy TRD";
|
||||
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
@Author : mashenquan
|
||||
@File : pic2txt.py
|
||||
"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
|
@ -31,23 +33,41 @@ class Pic2Txt(Action):
|
|||
evaluation_conclusion: str = "",
|
||||
additional_technical_requirements: str = "",
|
||||
) -> str:
|
||||
base64_images = [encode_image(i) for i in image_paths]
|
||||
descriptions = {}
|
||||
for i in image_paths:
|
||||
filename = Path(i)
|
||||
base64_image = encode_image(filename)
|
||||
rsp = await self._pic2txt(
|
||||
"Generate a paragraph of text based on the content of the image, the language of the text is consistent with the language in the image.",
|
||||
base64_image=base64_image,
|
||||
)
|
||||
descriptions[filename.name] = rsp
|
||||
|
||||
prompt = PROMPT.format(
|
||||
textual_user_requirement=textual_user_requirement,
|
||||
acknowledge=to_markdown_code_block(val=acknowledge),
|
||||
acknowledge=to_markdown_code_block(val=json.dumps(descriptions), type_="json"),
|
||||
legacy_output=to_markdown_code_block(val=legacy_output),
|
||||
evaluation_conclusion=evaluation_conclusion,
|
||||
additional_technical_requirements=to_markdown_code_block(val=additional_technical_requirements),
|
||||
)
|
||||
return await self._write(prompt, base64_images=base64_images)
|
||||
return await self._write(prompt)
|
||||
|
||||
@retry(
|
||||
wait=wait_random_exponential(min=1, max=20),
|
||||
stop=stop_after_attempt(6),
|
||||
after=general_after_log(logger),
|
||||
)
|
||||
async def _write(self, prompt: str, base64_images: List[str]) -> str:
|
||||
rsp = await self.llm.aask(prompt, images=base64_images)
|
||||
async def _write(self, prompt: str) -> str:
|
||||
rsp = await self.llm.aask(prompt)
|
||||
return rsp
|
||||
|
||||
@retry(
|
||||
wait=wait_random_exponential(min=1, max=20),
|
||||
stop=stop_after_attempt(6),
|
||||
after=general_after_log(logger),
|
||||
)
|
||||
async def _pic2txt(self, prompt: str, base64_image: str) -> str:
|
||||
rsp = await self.llm.aask(prompt, images=base64_image)
|
||||
return rsp
|
||||
|
||||
|
||||
|
|
@ -70,9 +90,9 @@ PROMPT = """
|
|||
---
|
||||
You are a tool that generates an intact textual user requirements given a few of textual fragments of user requirements and some fragments of UI pictures.
|
||||
The content of "Textual User Requirements" provides a few of textual fragments of user requirements;
|
||||
The content of "Acknowledge" provides additional information related to the user requirements;
|
||||
The content of "Acknowledge" provides the descriptions of pictures used in "Textual User Requirements";
|
||||
"Legacy Outputs" contains the intact textual user requirements generated by you last time, which you can improve by addressing the issues raised in "Evaluation Conclusion";
|
||||
"Additional Technical Requirements" specifies the additional technical requirements that the generated textual user requirements must meet;
|
||||
你需要将图片中的内容转换成文字描述,合并到"Textual User Requirements",以生成完整的用户需求;
|
||||
You need to merge the text content of the corresponding image in the "Acknowledge" into the "Textual User Requirements" to generate a complete, natural and coherent description of the user requirements;
|
||||
Return the intact textual user requirements according to the given fragments of the user requirement of "Textual User Requirements" and the UI pictures;
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -107,9 +107,9 @@ If there are interaction events with external systems in "TRD Design", you must
|
|||
Does the sequence of steps in "Interaction Events" cause performance or cost issues? Please provide detailed descriptions and reasons;
|
||||
It is problematic if the data stream composed of input/output contains passive or irrelevant data;
|
||||
Return a markdown JSON object with:
|
||||
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
|
||||
- an "issues" key containing a string list of natural text about the issues that need to be addressed, found in the "TRD Design" if any exist, each issue found must provide a detailed description and include reasons;
|
||||
- a "conclusion" key containing the evaluation conclusion;
|
||||
- a "correspondence_between" key containing the judgement detail of the natural text string list about the correspondence between "Interaction Events" and "TRD Design" steps;
|
||||
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "User Requirements";
|
||||
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
|
@ -16,7 +18,7 @@ from metagpt.actions.requirement_analysis.trd import (
|
|||
EvaluateTRD,
|
||||
WriteTRD,
|
||||
)
|
||||
from metagpt.const import ASSISTANT_ALIAS, TEST_DATA_PATH
|
||||
from metagpt.const import ASSISTANT_ALIAS, DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
|
||||
from metagpt.context import Context
|
||||
from metagpt.logs import ToolLogItem, log_tool_output, logger
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
|
|
@ -200,6 +202,12 @@ async def write_framework(
|
|||
evaluation_conclusion = ""
|
||||
acknowledgement = await mock_asearch_acknowledgement(use_case_actors) # Replaced by acknowledgement_repo later.
|
||||
loop_count = 0
|
||||
output_dir = (
|
||||
Path(output_dir)
|
||||
if output_dir
|
||||
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
|
||||
)
|
||||
file_list = []
|
||||
while not is_pass and (context.cost_manager.total_cost < context.cost_manager.max_budget):
|
||||
try:
|
||||
framework = await write_framework.run(
|
||||
|
|
@ -226,9 +234,9 @@ async def write_framework(
|
|||
logger.info(f"Loop {loop_count}")
|
||||
if context.cost_manager.total_cost < 1 and loop_count > max_loop:
|
||||
break
|
||||
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
|
||||
logger.info(f"Output:\n{file_list}")
|
||||
|
||||
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
|
||||
logger.info(f"Output:\n{file_list}")
|
||||
return "## Software Framework" + "".join([f"\n- {i}" for i in file_list])
|
||||
|
||||
|
||||
|
|
@ -237,13 +245,12 @@ async def write_trd_and_framework(
|
|||
use_case_actors: str,
|
||||
user_requirements: str,
|
||||
additional_technical_requirements: str,
|
||||
investment: float = 17.0,
|
||||
investment: float = 50.0,
|
||||
output_dir: Optional[str] = "",
|
||||
context: Optional[Context] = None,
|
||||
) -> str:
|
||||
context = context or Context(cost_manager=CostManager(max_budget=investment))
|
||||
trd = await write_trd(use_case_actors=use_case_actors, user_requirements=user_requirements, context=context)
|
||||
|
||||
return await write_framework(
|
||||
use_case_actors=use_case_actors,
|
||||
trd=trd,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
- Using pure javascript without any third-party package, 法务查询者与国际小超人钉钉小程序之间UI用web;
|
||||
- 法务中台网址:`https://mock.apipark.cn/m1/4717294-4369585-default`;
|
||||
- 法务中台网址:`https://mock.apipark.cn/m1/4717294-4369585-default`, 只有国际小超人钉钉小程序能访问;
|
||||
- 写代码时,不要单元测试代码;
|
||||
- 如果使用了接口 ID 6, 它的返回结果要去重复项;
|
||||
- 如果使用了接口 ID 6, 它的返回结果要去重复项;
|
||||
- 不需要实现登录相关操作;
|
||||
|
|
@ -1,30 +1,16 @@
|
|||
3.1.功能总述
|
||||
|
||||
国际小超人钉钉小程序一期支持法务文档一键查看、各国法律意见检索、申请合同模板三个功能。
|
||||
|
||||
1、法务文档提供入口,用户点击后一键进入语雀查看,后续法务在语雀中进行维护。——pc和手机
|
||||
|
||||
2、各国法律意见在钉钉端可进行多维度检索和查看。——pc和手机【待定】
|
||||
|
||||
3、bd可以在钉钉端申请合同模板,提交申请后即可以下载模板。——可以pc和手机申请,但是下载只能通过pc
|
||||
|
||||
小程度底部有3个tab,首页、模板、我的
|
||||
|
||||
3.2.首页
|
||||
|
||||
首页有两个分区,上面部分是法律意见检索栏。
|
||||
|
||||
用户第一次进入小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
|
||||
法务查询者第一次进入国际小超人钉钉小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
|
||||
|
||||
【首页】
|
||||

|
||||
【按国家名维度搜索】
|
||||
|
||||
用户在搜索框中进行检索时采用typeahead,只能下拉选择数据库中有的国家名称。
|
||||
法务查询者在国际小超人钉钉小程序的搜索框中进行检索时采用typeahead,只能下拉选择法务中台中有的国家名称。
|
||||

|
||||
【检索结果】
|
||||
|
||||
可根据滚筒切换业务线
|
||||

|
||||

|
||||

|
||||
法务查询者可根据国际小超人钉钉小程序UI上的滚筒切换业务线
|
||||

|
||||
25
tests/data/requirements/1.txt
Normal file
25
tests/data/requirements/1.txt
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
## Textual User Requirements
|
||||
|
||||
### 3.2. 首页
|
||||
|
||||
首页有两个分区,上面部分是法律意见检索栏。
|
||||
|
||||
法务查询者第一次进入国际小超人钉钉小程序展示引导页,以后进入不再展示,点击「我知道了」引导页消失。
|
||||
|
||||
#### 首页
|
||||

|
||||
这是一个名为“法务小超人”的移动应用程序的界面截图。界面顶部显示了应用名称和一个可切换语言的按钮“English”。在界面中间部分,有一个标题“法律意见查询”,以及一个搜索框,提示输入国家名称以查询法律意见。下方显示已收录法律意见8394篇。界面下半部分是“法务 Q&A”部分,列出了一些法律相关的选项,例如“国际法务接入口人”、“国内法务接入口人”、“国际法律协议合同办理指引”和“国内法律协议合同办理指引”。界面底部有三个导航按钮,分别是“首页”、“模板”和“我的”。
|
||||
|
||||
#### 按国家名维度搜索
|
||||
法务查询者在国际小超人钉钉小程序的搜索框中进行检索时采用typeahead,只能下拉选择法务中台中有的国家名称。
|
||||

|
||||
在这张图像中,用户正在一个名为“法律意见查询”的应用中进行国家名称的搜索。用户在搜索框中输入国家名称时,系统会提供下拉建议。这些建议基于 typeahead 功能,从数据库中筛选出匹配的国家名称供用户选择。目前,搜索结果包含了“中国”和“菲律宾”两个具体的国家名称,其它显示为“国家名”。用户可以通过下拉菜单快速选择所需的国家名称。
|
||||
|
||||
#### 检索结果
|
||||
法务查询者可根据国际小超人钉钉小程序UI上的滚筒切换业务线
|
||||

|
||||
这张图片展示了一个移动应用的界面,界面标题为“法律意见详情”。用户可以根据具体情况切换业务线。界面中有多个字段,包括“国家名称”、“国家情况描述”、“业务线”、“产品法规分析”和“签约主体”。第一张截图显示了详细的法律情报信息,包含区域名称、区域情况描述、业务线和产品法规概述等字段。第二张截图显示了“法律意见详情”界面,其中列出了国家名称、国家情况描述、业务线、产品法规分析和签约主体。第三张截图与第二张相似,但显示了选项的可选择状态。最下方有“取消”和“确定”的按钮。
|
||||
法务查询者从国家详情中的业务线名列表中选出要查看的业务线。
|
||||
|
||||
#### 查看法律意见详情
|
||||
国际小超人钉钉小程序用国家代码和业务代码做参数,查询法律意见详情,然后将结果展示给法务查询者。
|
||||
BIN
tests/data/requirements/pic/2.1.png
Normal file
BIN
tests/data/requirements/pic/2.1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 180 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 352 KiB After Width: | Height: | Size: 653 KiB |
|
|
@ -11,8 +11,6 @@ async def test_pic2txt(context):
|
|||
TEST_DATA_PATH / "requirements/pic/1.png",
|
||||
TEST_DATA_PATH / "requirements/pic/2.png",
|
||||
TEST_DATA_PATH / "requirements/pic/3.png",
|
||||
TEST_DATA_PATH / "requirements/pic/4.png",
|
||||
TEST_DATA_PATH / "requirements/pic/5.png",
|
||||
]
|
||||
textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt")
|
||||
acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue