feat: + pic2txt

This commit is contained in:
莘权 马 2024-06-28 13:50:47 +08:00
parent 5f55590a57
commit dcb76de45e
6 changed files with 55 additions and 19 deletions

View file

@ -20,7 +20,7 @@ from metagpt.utils.common import encode_image, general_after_log, to_markdown_co
@register_tool(include_functions=["run"])
class Pic2Txt(Action):
"""Pic2Txt deal with the following situations:
1. Given a picture about the user requirements, write out the textual user requirements.
Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements.
"""
async def run(
@ -28,11 +28,36 @@ class Pic2Txt(Action):
*,
image_paths: List[str],
textual_user_requirement: str = "",
acknowledge: str = "",
legacy_output: str = "",
evaluation_conclusion: str = "",
additional_technical_requirements: str = "",
) -> str:
"""
Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements
Args:
image_paths (List[str]): A list of file paths to the input image(s) depicting user requirements.
textual_user_requirement (str, optional): Textual user requirement that alongside the given images, if any.
legacy_output (str, optional): The intact textual user requirements generated by you last time, if any.
evaluation_conclusion (str, optional): Conclusion or evaluation based on the processed requirements.
additional_technical_requirements (str, optional): Any supplementary technical details relevant to the process.
Returns:
str: Textual representation of user requirements extracted from the provided image(s).
Raises:
ValueError: If image_paths list is empty.
OSError: If there is an issue accessing or reading the image files.
Example:
>>> images = ["requirements/pic/1.png", "requirements/pic/2.png", "requirements/pic/3.png"]
>>> textual_user_requirements = "User requirement paragraph 1 ..., ![](1.png). paragraph 2...![](2.png)..."
>>> action = Pic2Txt()
>>> intact_textual_user_requirements = await action.run(image_paths=images, textual_user_requirement=textual_user_requirements)
>>> print(intact_textual_user_requirements)
"User requirement paragraph 1 ..., ![...](1.png) This picture describes... paragraph 2...![...](2.png)..."
"""
descriptions = {}
for i in image_paths:
filename = Path(i)