feat: + pic2txt

2026-07-05 16:02:14 +02:00 · 2024-06-28 13:50:47 +08:00 · 2024-06-28 13:50:47 +08:00 · dcb76de45e
commit dcb76de45e
parent 5f55590a57
6 changed files with 55 additions and 19 deletions
--- a/metagpt/actions/requirement_analysis/requirement/pic2txt.py
+++ b/metagpt/actions/requirement_analysis/requirement/pic2txt.py
@ -20,7 +20,7 @@ from metagpt.utils.common import encode_image, general_after_log, to_markdown_co
@register_tool(include_functions=["run"])
 class Pic2Txt(Action):
    """Pic2Txt deal with the following situations:
-    1. Given a picture about the user requirements, write out the textual user requirements.
+    Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements.
    """

    async def run(
@ -28,11 +28,36 @@ class Pic2Txt(Action):
        *,
        image_paths: List[str],
        textual_user_requirement: str = "",
-        acknowledge: str = "",
        legacy_output: str = "",
        evaluation_conclusion: str = "",
        additional_technical_requirements: str = "",
    ) -> str:
+        """
+        Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements
+
+        Args:
+            image_paths (List[str]): A list of file paths to the input image(s) depicting user requirements.
+            textual_user_requirement (str, optional): Textual user requirement that alongside the given images, if any.
+            legacy_output (str, optional): The intact textual user requirements generated by you last time, if any.
+            evaluation_conclusion (str, optional): Conclusion or evaluation based on the processed requirements.
+            additional_technical_requirements (str, optional): Any supplementary technical details relevant to the process.
+
+        Returns:
+            str: Textual representation of user requirements extracted from the provided image(s).
+
+        Raises:
+            ValueError: If image_paths list is empty.
+            OSError: If there is an issue accessing or reading the image files.
+
+        Example:
+            >>> images = ["requirements/pic/1.png", "requirements/pic/2.png", "requirements/pic/3.png"]
+            >>> textual_user_requirements = "User requirement paragraph 1 ..., ![](1.png). paragraph 2...![](2.png)..."
+            >>> action = Pic2Txt()
+            >>> intact_textual_user_requirements = await action.run(image_paths=images, textual_user_requirement=textual_user_requirements)
+            >>> print(intact_textual_user_requirements)
+            "User requirement paragraph 1 ..., ![...](1.png) This picture describes... paragraph 2...![...](2.png)..."
+
+        """
        descriptions = {}
        for i in image_paths:
            filename = Path(i)