feat: +pic2txt

This commit is contained in:
莘权 马 2024-06-27 22:13:41 +08:00
parent 742ff0e80a
commit 632452e2a1
12 changed files with 79 additions and 38 deletions

View file

@ -54,7 +54,7 @@ async def save_framework(
output_dir = (
Path(output_dir)
if output_dir
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%S") + uuid.uuid4().hex[0:8])
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
)
output_dir.mkdir(parents=True, exist_ok=True)

View file

@ -99,8 +99,8 @@ Parts not mentioned in the "Legacy TRD" will be handled by other TRDs, therefore
Do the parameters of the interface of the external system used in the code comply with it's specifications in 'Acknowledge'?
Is there a lack of necessary configuration files?
Return a markdown JSON object with:
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
- an "issues" key containing a string list of natural text about the issues that need to addressed, found in the "Legacy Outputs" if any exits, each issue found must provide a detailed description and include reasons;
- a "conclusion" key containing the evaluation conclusion;
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "Legacy TRD";
- a "is_pass" key containing a true boolean value if there is not any issue in the "Legacy Outputs";
"""

View file

@ -5,6 +5,8 @@
@Author : mashenquan
@File : pic2txt.py
"""
import json
from pathlib import Path
from typing import List
from tenacity import retry, stop_after_attempt, wait_random_exponential
@ -31,23 +33,41 @@ class Pic2Txt(Action):
evaluation_conclusion: str = "",
additional_technical_requirements: str = "",
) -> str:
base64_images = [encode_image(i) for i in image_paths]
descriptions = {}
for i in image_paths:
filename = Path(i)
base64_image = encode_image(filename)
rsp = await self._pic2txt(
"Generate a paragraph of text based on the content of the image, the language of the text is consistent with the language in the image.",
base64_image=base64_image,
)
descriptions[filename.name] = rsp
prompt = PROMPT.format(
textual_user_requirement=textual_user_requirement,
acknowledge=to_markdown_code_block(val=acknowledge),
acknowledge=to_markdown_code_block(val=json.dumps(descriptions), type_="json"),
legacy_output=to_markdown_code_block(val=legacy_output),
evaluation_conclusion=evaluation_conclusion,
additional_technical_requirements=to_markdown_code_block(val=additional_technical_requirements),
)
return await self._write(prompt, base64_images=base64_images)
return await self._write(prompt)
@retry(
wait=wait_random_exponential(min=1, max=20),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _write(self, prompt: str, base64_images: List[str]) -> str:
rsp = await self.llm.aask(prompt, images=base64_images)
async def _write(self, prompt: str) -> str:
rsp = await self.llm.aask(prompt)
return rsp
@retry(
wait=wait_random_exponential(min=1, max=20),
stop=stop_after_attempt(6),
after=general_after_log(logger),
)
async def _pic2txt(self, prompt: str, base64_image: str) -> str:
rsp = await self.llm.aask(prompt, images=base64_image)
return rsp
@ -70,9 +90,9 @@ PROMPT = """
---
You are a tool that generates an intact textual user requirements given a few of textual fragments of user requirements and some fragments of UI pictures.
The content of "Textual User Requirements" provides a few of textual fragments of user requirements;
The content of "Acknowledge" provides additional information related to the user requirements;
The content of "Acknowledge" provides the descriptions of pictures used in "Textual User Requirements";
"Legacy Outputs" contains the intact textual user requirements generated by you last time, which you can improve by addressing the issues raised in "Evaluation Conclusion";
"Additional Technical Requirements" specifies the additional technical requirements that the generated textual user requirements must meet;
你需要将图片中的内容转换成文字描述合并到"Textual User Requirements"以生成完整的用户需求
You need to merge the text content of the corresponding image in the "Acknowledge" into the "Textual User Requirements" to generate a complete, natural and coherent description of the user requirements;
Return the intact textual user requirements according to the given fragments of the user requirement of "Textual User Requirements" and the UI pictures;
"""

View file

@ -107,9 +107,9 @@ If there are interaction events with external systems in "TRD Design", you must
Does the sequence of steps in "Interaction Events" cause performance or cost issues? Please provide detailed descriptions and reasons;
It is problematic if the data stream composed of input/output contains passive or irrelevant data;
Return a markdown JSON object with:
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
- an "issues" key containing a string list of natural text about the issues that need to be addressed, found in the "TRD Design" if any exist, each issue found must provide a detailed description and include reasons;
- a "conclusion" key containing the evaluation conclusion;
- a "correspondence_between" key containing the judgement detail of the natural text string list about the correspondence between "Interaction Events" and "TRD Design" steps;
- a "misalignment" key containing the judgement detail of the natural text string list about the misalignment with "User Requirements";
- a "is_pass" key containing a true boolean value if there is not any issue in the "TRD Design";
"""

View file

@ -2,6 +2,8 @@
# -*- coding: utf-8 -*-
from __future__ import annotations
import uuid
from datetime import datetime
from pathlib import Path
from typing import Optional
@ -16,7 +18,7 @@ from metagpt.actions.requirement_analysis.trd import (
EvaluateTRD,
WriteTRD,
)
from metagpt.const import ASSISTANT_ALIAS, TEST_DATA_PATH
from metagpt.const import ASSISTANT_ALIAS, DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
from metagpt.context import Context
from metagpt.logs import ToolLogItem, log_tool_output, logger
from metagpt.tools.tool_registry import register_tool
@ -200,6 +202,12 @@ async def write_framework(
evaluation_conclusion = ""
acknowledgement = await mock_asearch_acknowledgement(use_case_actors) # Replaced by acknowledgement_repo later.
loop_count = 0
output_dir = (
Path(output_dir)
if output_dir
else DEFAULT_WORKSPACE_ROOT / (datetime.now().strftime("%Y%m%d%H%M%ST") + uuid.uuid4().hex[0:8])
)
file_list = []
while not is_pass and (context.cost_manager.total_cost < context.cost_manager.max_budget):
try:
framework = await write_framework.run(
@ -226,9 +234,9 @@ async def write_framework(
logger.info(f"Loop {loop_count}")
if context.cost_manager.total_cost < 1 and loop_count > max_loop:
break
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
logger.info(f"Output:\n{file_list}")
file_list = await save_framework(dir_data=framework, trd=trd, output_dir=output_dir)
logger.info(f"Output:\n{file_list}")
return "## Software Framework" + "".join([f"\n- {i}" for i in file_list])
@ -237,13 +245,12 @@ async def write_trd_and_framework(
use_case_actors: str,
user_requirements: str,
additional_technical_requirements: str,
investment: float = 17.0,
investment: float = 50.0,
output_dir: Optional[str] = "",
context: Optional[Context] = None,
) -> str:
context = context or Context(cost_manager=CostManager(max_budget=investment))
trd = await write_trd(use_case_actors=use_case_actors, user_requirements=user_requirements, context=context)
return await write_framework(
use_case_actors=use_case_actors,
trd=trd,