From 99c143e8f301f89738eccdb4988552fc0a4a8cec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= <mashenquan@fuzhi.ai>
Date: Fri, 18 Aug 2023 20:09:06 +0800
Subject: [PATCH] feat: +metagpt text to image

---
 .gitignore                                    |   1 +
 .well-known/metagpt_oas3_api.yaml             |  47 +++++++-
 metagpt/tools/metagpt_text_to_image.py        | 112 ++++++++++++++++++
 ...bedding.py => openai_text_to_embedding.py} |   6 +-
 ...ext_2_image.py => openai_text_to_image.py} |   6 +-
 5 files changed, 164 insertions(+), 8 deletions(-)
 create mode 100644 metagpt/tools/metagpt_text_to_image.py
 rename metagpt/tools/{openai_text_2_embedding.py => openai_text_to_embedding.py} (94%)
 rename metagpt/tools/{openai_text_2_image.py => openai_text_to_image.py} (94%)

diff --git a/.gitignore b/.gitignore
index c4c79c733..2cba27484 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,3 +163,4 @@ workspace/*
 *.mmd
 tmp
 output.wav
+tmp.png
diff --git a/.well-known/metagpt_oas3_api.yaml b/.well-known/metagpt_oas3_api.yaml
index 7ae10579c..a226181a5 100644
--- a/.well-known/metagpt_oas3_api.yaml
+++ b/.well-known/metagpt_oas3_api.yaml
@@ -71,7 +71,7 @@ paths:
   /txt2img/openai:
     post:
       summary: "Convert Text to Base64-encoded Image Data Stream"
-      operationId: openai_text_2_image.oas3_openai_text_2_image
+      operationId: openai_text_to_image.oas3_openai_text_to_image
       requestBody:
         required: true
         content:
@@ -109,7 +109,7 @@ paths:
   /txt2embedding/openai:
     post:
       summary: Text to embedding
-      operationId: openai_text_2_embedding.oas3_openai_text_2_embedding
+      operationId: openai_text_to_embedding.oas3_openai_text_to_embedding
       description: Retrieve an embedding for the provided text using the OpenAI API.
       requestBody:
         content:
@@ -144,6 +144,49 @@ paths:
             application/json:
               schema:
                 $ref: "#/components/schemas/Error"
+
+  /txt2image/metagpt:
+    post:
+      summary: "Text to Image"
+      description: "Generate an image from the provided text using the MetaGPT Text-to-Image API."
+      operationId: metagpt_text_to_image.oas3_metagpt_text_to_image
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - text
+              properties:
+                text:
+                  type: string
+                  description: "The text used for image conversion."
+                size_type:
+                  type: string
+                  enum: ["512x512", "512x768"]
+                  default: "512x512"
+                  description: "Size of the generated image."
+                model_url:
+                  type: string
+                  description: "Model reset API URL for text-to-image."
+                  default: ""
+      responses:
+        '200':
+          description: "Base64-encoded image data."
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  image_data:
+                    type: string
+                    format: base64
+        '400':
+          description: "Bad Request"
+        '500':
+          description: "Internal Server Error"
+
 components:
   schemas:
     Embedding:
diff --git a/metagpt/tools/metagpt_text_to_image.py b/metagpt/tools/metagpt_text_to_image.py
new file mode 100644
index 000000000..393215df0
--- /dev/null
+++ b/metagpt/tools/metagpt_text_to_image.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : metagpt_text_to_image.py
+@Desc    : MetaGPT Text-to-Image OAS3 api, which provides text-to-image functionality.
+"""
+import base64
+import os
+import sys
+from pathlib import Path
+from typing import List, Dict
+
+import requests
+from pydantic import BaseModel
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))  # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+from metagpt.logs import logger
+
+
+class MetaGPTText2Image:
+    def __init__(self, model_url):
+        """
+        :param model_url: Model reset api url
+        """
+        self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
+
+    def text_2_image(self, text, size_type="512x512"):
+        """Text to image
+
+        :param text: The text used for image conversion.
+        :param size_type: One of ['512x512', '512x768']
+        :return: The image data is returned in Base64 encoding.
+        """
+
+        headers = {
+            "Content-Type": "application/json"
+        }
+        dims = size_type.split("x")
+        data = {
+            "prompt": text,
+            "negative_prompt": "(easynegative:0.8),black, dark,Low resolution",
+            "override_settings": {"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"},
+            "seed": -1,
+            "batch_size": 1,
+            "n_iter": 1,
+            "steps": 20,
+            "cfg_scale": 11,
+            "width": int(dims[0]),
+            "height": int(dims[1]),  # 768,
+            "restore_faces": False,
+            "tiling": False,
+            "do_not_save_samples": False,
+            "do_not_save_grid": False,
+            "enable_hr": False,
+            "hr_scale": 2,
+            "hr_upscaler": "Latent",
+            "hr_second_pass_steps": 0,
+            "hr_resize_x": 0,
+            "hr_resize_y": 0,
+            "hr_upscale_to_x": 0,
+            "hr_upscale_to_y": 0,
+            "truncate_x": 0,
+            "truncate_y": 0,
+            "applied_old_hires_behavior_to": None,
+            "eta": None,
+            "sampler_index": "DPM++ SDE Karras",
+            "alwayson_scripts": {},
+        }
+
+        class ImageResult(BaseModel):
+            images: List
+            parameters: Dict
+
+        try:
+            response = requests.post(self.model_url, headers=headers, json=data)
+            response.raise_for_status()  # Raise an exception for 4xx or 5xx responses
+            result = ImageResult(**response.json())
+            if len(result.images) == 0:
+                return ""
+            return result.images[0]
+        except requests.exceptions.RequestException as e:
+            logger.error(f"An error occurred:{e}")
+        return ""
+
+
+# Export
+def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""):
+    """Text to image
+
+    :param text: The text used for image conversion.
+    :param model_url: Model reset api
+    :param size_type: One of ['512x512', '512x768']
+    :return: The image data is returned in Base64 encoding.
+    """
+    if not text:
+        return ""
+    if not model_url:
+        model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
+    return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type)
+
+
+if __name__ == "__main__":
+    initialize_environment()
+
+    v = oas3_metagpt_text_2_image("Panda emoji")
+    data = base64.b64decode(v)
+    with open("tmp.png", mode="wb") as writer:
+        writer.write(data)
+    print(v)
diff --git a/metagpt/tools/openai_text_2_embedding.py b/metagpt/tools/openai_text_to_embedding.py
similarity index 94%
rename from metagpt/tools/openai_text_2_embedding.py
rename to metagpt/tools/openai_text_to_embedding.py
index eb90a1ea9..9eddd5bc1 100644
--- a/metagpt/tools/openai_text_2_embedding.py
+++ b/metagpt/tools/openai_text_to_embedding.py
@@ -3,7 +3,7 @@
 """
 @Time    : 2023/8/18
 @Author  : mashenquan
-@File    : openai_text_2_embedding.py
+@File    : openai_text_to_embedding.py
 @Desc    : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality.
             For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object`
 """
@@ -70,7 +70,7 @@ class OpenAIText2Embedding:
 
 
 # Export
-def oas3_openai_text_2_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
+def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
     """Text to embedding
 
     :param text: The text used for embedding.
@@ -88,5 +88,5 @@ def oas3_openai_text_2_embedding(text, model="text-embedding-ada-002", openai_ap
 if __name__ == "__main__":
     initialize_environment()
 
-    v = oas3_openai_text_2_embedding("Panda emoji")
+    v = oas3_openai_text_to_embedding("Panda emoji")
     print(v)
diff --git a/metagpt/tools/openai_text_2_image.py b/metagpt/tools/openai_text_to_image.py
similarity index 94%
rename from metagpt/tools/openai_text_2_image.py
rename to metagpt/tools/openai_text_to_image.py
index 50c007626..6ec96d166 100644
--- a/metagpt/tools/openai_text_2_image.py
+++ b/metagpt/tools/openai_text_to_image.py
@@ -3,7 +3,7 @@
 """
 @Time    : 2023/8/17
 @Author  : mashenquan
-@File    : openai_text_2_image.py
+@File    : openai_text_to_image.py
 @Desc    : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
 """
 import base64
@@ -78,7 +78,7 @@ class OpenAIText2Image:
 
 
 # Export
-def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=""):
+def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""):
     """Text to image
 
     :param text: The text used for image conversion.
@@ -96,5 +96,5 @@ def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=
 if __name__ == "__main__":
     initialize_environment()
 
-    v = oas3_openai_text_2_image("Panda emoji")
+    v = oas3_openai_text_to_image("Panda emoji")
     print(v)