From 40f5d5e40efda6cafe1f809c43fbf28fab0d8479 Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Fri, 12 Jan 2024 18:30:48 +0800
Subject: [PATCH 1/8] add vision tool for code_interpreter
---
metagpt/prompts/tool_type.py | 6 ++
metagpt/tools/__init__.py | 7 ++
metagpt/tools/functions/libs/vision.py | 81 ++++++++++++++++++++++
metagpt/tools/functions/schemas/vision.yml | 20 ++++++
4 files changed, 114 insertions(+)
create mode 100644 metagpt/tools/functions/libs/vision.py
create mode 100644 metagpt/tools/functions/schemas/vision.yml
diff --git a/metagpt/prompts/tool_type.py b/metagpt/prompts/tool_type.py
index ec848bbe4..43ead78a6 100644
--- a/metagpt/prompts/tool_type.py
+++ b/metagpt/prompts/tool_type.py
@@ -37,3 +37,9 @@ The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""
+
+# Prompt for using tools of "vision" type
+VISION_PROMPT = """
+The current task is about converting image into webpage code. please note the following:
+- Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
+"""
\ No newline at end of file
diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py
index 4b3528795..045ede622 100644
--- a/metagpt/tools/__init__.py
+++ b/metagpt/tools/__init__.py
@@ -17,6 +17,7 @@ from metagpt.prompts.tool_type import (
FEATURE_ENGINEERING_PROMPT,
MODEL_TRAIN_PROMPT,
MODEL_EVALUATE_PROMPT,
+ VISION_PROMPT
)
@@ -71,6 +72,12 @@ TOOL_TYPE_MAPPINGS = {
desc="Only for evaluating model.",
usage_prompt=MODEL_EVALUATE_PROMPT,
),
+ "vision": ToolType(
+ name="vision",
+ module=str(TOOL_LIBS_PATH / "vision"),
+ desc="Only for converting image into webpage code.",
+ usage_prompt=VISION_PROMPT,
+ ),
"other": ToolType(
name="other",
module="",
diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py
new file mode 100644
index 000000000..b653c9300
--- /dev/null
+++ b/metagpt/tools/functions/libs/vision.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2024/01/12
+@Author : mannaandpoem
+@File : vision.py
+"""
+import requests
+
+import base64
+
+OPENAI_API_BASE = "..."
+API_KEY = "sk-..."
+MODEL = "..."
+MAX_TOKENS = 4096
+
+
+class Vision:
+ def __init__(self):
+ self.api_key = API_KEY
+ self.model = MODEL
+ self.max_tokens = MAX_TOKENS
+
+ def analyze_layout(
+ self,
+ image_path,
+ prompt="You are now a UI/UX, please generate layout information for this image: \n\n"
+ "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design."
+ "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
+ ):
+ print(f"analyze_layout: {image_path}")
+ return self.get_result(image_path, prompt)
+
+ def generate_web_pages(
+ self,
+ image_path,
+ prompt="You are now a UI/UX and Web Developer. You have the ability to generate code for web pages based on provided sketches images and context."
+ "Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. "
+ "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. "
+ "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
+ "\n\nNow, please generate the corresponding webpage code including HTML, CSS and JavaScript:"
+ ):
+ layout = self.analyze_layout(image_path)
+ prompt += "\n\n # Context\n The layout information of the sketch image is: \n" + layout
+ return self.get_result(image_path, prompt)
+
+ def get_result(self, image_path, prompt):
+ base64_image = self.encode_image(image_path)
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {self.api_key}"
+ }
+ payload = {
+ "model": self.model,
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": prompt},
+ {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
+ }
+ ]
+ }
+ ],
+ "max_tokens": self.max_tokens,
+ }
+ response = requests.post(f"{OPENAI_API_BASE}/chat/completions", headers=headers, json=payload)
+ return response.json()["choices"][0]["message"]["content"]
+
+ @staticmethod
+ def encode_image(image_path):
+ with open(image_path, "rb") as image_file:
+ return base64.b64encode(image_file.read()).decode('utf-8')
+
+
+if __name__ == "__main__":
+ vision = Vision()
+ rsp = vision.generate_web_pages(image_path="./img.png")
+ print(rsp)
\ No newline at end of file
diff --git a/metagpt/tools/functions/schemas/vision.yml b/metagpt/tools/functions/schemas/vision.yml
new file mode 100644
index 000000000..795854e75
--- /dev/null
+++ b/metagpt/tools/functions/schemas/vision.yml
@@ -0,0 +1,20 @@
+Vision:
+ type: class
+ description: "Class for generating web pages at once."
+ methods:
+ __init__:
+ description: "Initialize Vision class with default values."
+
+ generate_web_pages:
+ description: "Generate web pages including all code(HTML, CSS and JavaScript) in one go based on the image."
+ parameters:
+ properties:
+ image_path:
+ type: str
+ description: "The path of the image file"
+
+ required:
+ - image_path
+ returns:
+ type: str
+ description: "Generated web page content."
\ No newline at end of file
From f45a368be2cf9860c2046656767b6c4f1bc0f53a Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 11:13:35 +0800
Subject: [PATCH 2/8] 1. add vision config in config.yaml 2. add
imitate_webpage.py in example 3. update vision.py
---
config/config.yaml | 14 +++++++
examples/imitate_webpage.py | 25 +++++++++++++
metagpt/tools/functions/libs/vision.py | 51 +++++++++++++-------------
3 files changed, 65 insertions(+), 25 deletions(-)
create mode 100644 examples/imitate_webpage.py
diff --git a/config/config.yaml b/config/config.yaml
index 79ebae863..5eab964bd 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -86,6 +86,20 @@ TIMEOUT: 60 # Timeout for llm invocation
#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
#AZURE_TTS_REGION: "eastus"
+#### for OPENAI VISION
+
+OPENAI_VISION_URL: "https://openai-forward.metadl.com/v1"
+OPENAI_VISION_KEY: "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"
+OPENAI_VISION_MODEL: "gpt-4-vision-preview"
+VISION_MAX_TOKENS: 4096
+
+#### for AZURE VISION
+
+#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT"
+#AZURE_VISION_KEY: "YOUR_API_KEY"
+#AZURE_VISION_REGION: "YOUR_VISION_REGION_NAME"
+#VISION_MAX_TOKENS: 4096
+
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
#SD_URL: "YOUR_SD_URL"
diff --git a/examples/imitate_webpage.py b/examples/imitate_webpage.py
new file mode 100644
index 000000000..47fcd251f
--- /dev/null
+++ b/examples/imitate_webpage.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2024/01/15
+@Author : mannaandpoem
+@File : imitate_webpage.py
+"""
+from metagpt.roles.code_interpreter import CodeInterpreter
+
+
+async def main():
+ prompt = """This is a URL of webpage: https://cn.bing.com/
+Firstly, utilize Selenium and WebDriver for rendering.
+Secondly, convert image to a webpage including HTML, CSS and JS in one go.
+Finally, save webpage in a text file.
+Note: All required dependencies and environments have been fully installed and configured."""
+ ci = CodeInterpreter(goal=prompt, use_tools=True)
+
+ await ci.run(prompt)
+
+
+if __name__ == '__main__':
+ import asyncio
+
+ asyncio.run(main())
diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py
index b653c9300..e6924b9bc 100644
--- a/metagpt/tools/functions/libs/vision.py
+++ b/metagpt/tools/functions/libs/vision.py
@@ -9,39 +9,40 @@ import requests
import base64
-OPENAI_API_BASE = "..."
-API_KEY = "sk-..."
-MODEL = "..."
-MAX_TOKENS = 4096
+from metagpt.config import CONFIG
+
+OPENAI_API_BASE = CONFIG.OPENAI_VISION_URL
+API_KEY = CONFIG.OPENAI_VISION_KEY
+MODEL = CONFIG.OPENAI_VISION_MODEL
+MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
+
+ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
+
+NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
+As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """
+
+GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages
+based on provided sketches images and context.
+Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript.
+
+NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
+As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry.
+
+Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
class Vision:
def __init__(self):
self.api_key = API_KEY
self.model = MODEL
- self.max_tokens = MAX_TOKENS
+ self.max_tokens = 4096
- def analyze_layout(
- self,
- image_path,
- prompt="You are now a UI/UX, please generate layout information for this image: \n\n"
- "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design."
- "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
- ):
- print(f"analyze_layout: {image_path}")
- return self.get_result(image_path, prompt)
+ def analyze_layout(self, image_path):
+ return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
- def generate_web_pages(
- self,
- image_path,
- prompt="You are now a UI/UX and Web Developer. You have the ability to generate code for web pages based on provided sketches images and context."
- "Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. "
- "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. "
- "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
- "\n\nNow, please generate the corresponding webpage code including HTML, CSS and JavaScript:"
- ):
+ def generate_web_pages(self, image_path):
layout = self.analyze_layout(image_path)
- prompt += "\n\n # Context\n The layout information of the sketch image is: \n" + layout
+ prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
return self.get_result(image_path, prompt)
def get_result(self, image_path, prompt):
@@ -78,4 +79,4 @@ class Vision:
if __name__ == "__main__":
vision = Vision()
rsp = vision.generate_web_pages(image_path="./img.png")
- print(rsp)
\ No newline at end of file
+ print(rsp)
From 2678413c51345299252f95050206e4e2083a823a Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 11:19:09 +0800
Subject: [PATCH 3/8] update config.yaml
---
config/config.yaml | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/config/config.yaml b/config/config.yaml
index 5eab964bd..412da8b15 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -88,16 +88,16 @@ TIMEOUT: 60 # Timeout for llm invocation
#### for OPENAI VISION
-OPENAI_VISION_URL: "https://openai-forward.metadl.com/v1"
-OPENAI_VISION_KEY: "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"
-OPENAI_VISION_MODEL: "gpt-4-vision-preview"
-VISION_MAX_TOKENS: 4096
+#OPENAI_VISION_URL: "YOUR_OPENAI_ENDPOINT"
+#OPENAI_VISION_KEY: "YOUR_API_KEY"
+#OPENAI_VISION_MODEL: "YOUR_VISION_MODEL_NAME"
+#VISION_MAX_TOKENS: 4096
#### for AZURE VISION
#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT"
#AZURE_VISION_KEY: "YOUR_API_KEY"
-#AZURE_VISION_REGION: "YOUR_VISION_REGION_NAME"
+#AZURE_VISION_REGION: "YOUR_VISION_MODEL_NAME"
#VISION_MAX_TOKENS: 4096
#### for Stable Diffusion
From 38929dc1248140bfd6246238f5ab946af7aa483d Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 11:47:36 +0800
Subject: [PATCH 4/8] update imitate_webpage.py
---
examples/imitate_webpage.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/examples/imitate_webpage.py b/examples/imitate_webpage.py
index b4610d5e0..da46af0a6 100644
--- a/examples/imitate_webpage.py
+++ b/examples/imitate_webpage.py
@@ -9,7 +9,8 @@ from metagpt.roles.code_interpreter import CodeInterpreter
async def main():
- prompt = """This is a URL of webpage: 'https://www.baidu.com/' .
+ web_url = 'https://www.baidu.com/'
+ prompt = f"""This is a URL of webpage: '{web_url}' .
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Finally, save webpage in a text file.
From 9eee30bf65d1bccc5226a7e5abae033a0e9acd51 Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 12:57:36 +0800
Subject: [PATCH 5/8] update config.yaml and vision.py for configuration of
vision
---
config/config.yaml | 9 ---------
metagpt/tools/functions/libs/vision.py | 7 ++++---
2 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/config/config.yaml b/config/config.yaml
index 412da8b15..d8fab693e 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -88,18 +88,9 @@ TIMEOUT: 60 # Timeout for llm invocation
#### for OPENAI VISION
-#OPENAI_VISION_URL: "YOUR_OPENAI_ENDPOINT"
-#OPENAI_VISION_KEY: "YOUR_API_KEY"
#OPENAI_VISION_MODEL: "YOUR_VISION_MODEL_NAME"
#VISION_MAX_TOKENS: 4096
-#### for AZURE VISION
-
-#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT"
-#AZURE_VISION_KEY: "YOUR_API_KEY"
-#AZURE_VISION_REGION: "YOUR_VISION_MODEL_NAME"
-#VISION_MAX_TOKENS: 4096
-
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
#SD_URL: "YOUR_SD_URL"
diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py
index e6924b9bc..8c29b0567 100644
--- a/metagpt/tools/functions/libs/vision.py
+++ b/metagpt/tools/functions/libs/vision.py
@@ -11,8 +11,8 @@ import base64
from metagpt.config import CONFIG
-OPENAI_API_BASE = CONFIG.OPENAI_VISION_URL
-API_KEY = CONFIG.OPENAI_VISION_KEY
+OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
+API_KEY = CONFIG.OPENAI_API_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
@@ -77,6 +77,7 @@ class Vision:
if __name__ == "__main__":
+ image_path = "image.png"
vision = Vision()
- rsp = vision.generate_web_pages(image_path="./img.png")
+ rsp = vision.generate_web_pages(image_path=image_path)
print(rsp)
From 841f69d5edc063ab2d9bf340654dd63ba12465db Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 12:57:36 +0800
Subject: [PATCH 6/8] update config.yaml and vision.py for configuration of
vision
---
config/config.yaml | 9 ---------
examples/imitate_webpage.py | 2 +-
metagpt/tools/functions/libs/vision.py | 7 ++++---
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/config/config.yaml b/config/config.yaml
index 412da8b15..d8fab693e 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -88,18 +88,9 @@ TIMEOUT: 60 # Timeout for llm invocation
#### for OPENAI VISION
-#OPENAI_VISION_URL: "YOUR_OPENAI_ENDPOINT"
-#OPENAI_VISION_KEY: "YOUR_API_KEY"
#OPENAI_VISION_MODEL: "YOUR_VISION_MODEL_NAME"
#VISION_MAX_TOKENS: 4096
-#### for AZURE VISION
-
-#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT"
-#AZURE_VISION_KEY: "YOUR_API_KEY"
-#AZURE_VISION_REGION: "YOUR_VISION_MODEL_NAME"
-#VISION_MAX_TOKENS: 4096
-
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
#SD_URL: "YOUR_SD_URL"
diff --git a/examples/imitate_webpage.py b/examples/imitate_webpage.py
index da46af0a6..6c12c7eda 100644
--- a/examples/imitate_webpage.py
+++ b/examples/imitate_webpage.py
@@ -9,7 +9,7 @@ from metagpt.roles.code_interpreter import CodeInterpreter
async def main():
- web_url = 'https://www.baidu.com/'
+ web_url = 'https://pytorch.org/'
prompt = f"""This is a URL of webpage: '{web_url}' .
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py
index e6924b9bc..8c29b0567 100644
--- a/metagpt/tools/functions/libs/vision.py
+++ b/metagpt/tools/functions/libs/vision.py
@@ -11,8 +11,8 @@ import base64
from metagpt.config import CONFIG
-OPENAI_API_BASE = CONFIG.OPENAI_VISION_URL
-API_KEY = CONFIG.OPENAI_VISION_KEY
+OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
+API_KEY = CONFIG.OPENAI_API_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
@@ -77,6 +77,7 @@ class Vision:
if __name__ == "__main__":
+ image_path = "image.png"
vision = Vision()
- rsp = vision.generate_web_pages(image_path="./img.png")
+ rsp = vision.generate_web_pages(image_path=image_path)
print(rsp)
From 7f1584db9e5bd153f5f78f2f79d3d16970f20f0c Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 17:26:35 +0800
Subject: [PATCH 7/8] 1. add test_vision.py 2. add save_webpages function in
vision.py and vision.yml
---
metagpt/tools/functions/libs/vision.py | 64 +++++++++++++++++---
metagpt/tools/functions/schemas/vision.yml | 20 +++++-
tests/metagpt/tools/functions/test_vision.py | 40 ++++++++++++
3 files changed, 113 insertions(+), 11 deletions(-)
create mode 100644 tests/metagpt/tools/functions/test_vision.py
diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py
index 8c29b0567..b10ad7608 100644
--- a/metagpt/tools/functions/libs/vision.py
+++ b/metagpt/tools/functions/libs/vision.py
@@ -5,6 +5,8 @@
@Author : mannaandpoem
@File : vision.py
"""
+from pathlib import Path
+
import requests
import base64
@@ -34,8 +36,9 @@ Now, please generate the corresponding webpage code including HTML, CSS and Java
class Vision:
def __init__(self):
self.api_key = API_KEY
+ self.api_base = OPENAI_API_BASE
self.model = MODEL
- self.max_tokens = 4096
+ self.max_tokens = MAX_TOKENS
def analyze_layout(self, image_path):
return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
@@ -43,7 +46,8 @@ class Vision:
def generate_web_pages(self, image_path):
layout = self.analyze_layout(image_path)
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
- return self.get_result(image_path, prompt)
+ result = self.get_result(image_path, prompt)
+ return result
def get_result(self, image_path, prompt):
base64_image = self.encode_image(image_path)
@@ -67,17 +71,59 @@ class Vision:
],
"max_tokens": self.max_tokens,
}
- response = requests.post(f"{OPENAI_API_BASE}/chat/completions", headers=headers, json=payload)
- return response.json()["choices"][0]["message"]["content"]
+ response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
+
+ if response.status_code != 200:
+ raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
+ else:
+ return response.json()["choices"][0]["message"]["content"]
@staticmethod
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
+ @staticmethod
+ def save_webpages(image_path, webpages) -> Path:
+ # 在当前目录下创建一个名为webpages的文件夹,用于存储html、css和js文件
+ webpages_path = Path(image_path).parent / "webpages"
+ webpages_path.mkdir(exist_ok=True)
-if __name__ == "__main__":
- image_path = "image.png"
- vision = Vision()
- rsp = vision.generate_web_pages(image_path=image_path)
- print(rsp)
+ try:
+ index_path = webpages_path / "index.html"
+ index = webpages.split("```html")[1].split("```")[0]
+ except IndexError:
+ raise ValueError("No html code found in the result, please check your image and try again.")
+
+ try:
+ if "styles.css" in index:
+ style_path = webpages_path / "styles.css"
+ elif "style.css" in index:
+ style_path = webpages_path / "style.css"
+ else:
+ style_path = None
+ style = webpages.split("```css")[1].split("```")[0] if style_path else ""
+
+ if "scripts.js" in index:
+ js_path = webpages_path / "scripts.js"
+ elif "script.js" in index:
+ js_path = webpages_path / "script.js"
+ else:
+ js_path = None
+ js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
+ except IndexError:
+ raise ValueError("No css or js code found in the result, please check your image and try again.")
+
+ try:
+ with open(index_path, "w") as f:
+ f.write(index)
+ if style_path:
+ with open(style_path, "w") as f:
+ f.write(style)
+ if js_path:
+ with open(js_path, "w") as f:
+ f.write(js)
+ except FileNotFoundError as e:
+ raise FileNotFoundError(f"Cannot save the webpages to {str(webpages_path)}") from e
+
+ return webpages_path
diff --git a/metagpt/tools/functions/schemas/vision.yml b/metagpt/tools/functions/schemas/vision.yml
index 795854e75..4cb247419 100644
--- a/metagpt/tools/functions/schemas/vision.yml
+++ b/metagpt/tools/functions/schemas/vision.yml
@@ -12,9 +12,25 @@ Vision:
image_path:
type: str
description: "The path of the image file"
-
required:
- image_path
returns:
type: str
- description: "Generated web page content."
\ No newline at end of file
+ description: "Generated webpages content."
+
+ save_webpages:
+ description: "Save webpages including all code(HTML, CSS and JavaScript) at once"
+ parameters:
+ properties:
+ image_path:
+ type: str
+ description: "The path of the image file"
+ webpages:
+ type: str
+ description: "The generated webpages content"
+ required:
+ - image_path
+ - webpages
+ returns:
+ type: Path
+ description: "The path of the saved webpages"
\ No newline at end of file
diff --git a/tests/metagpt/tools/functions/test_vision.py b/tests/metagpt/tools/functions/test_vision.py
new file mode 100644
index 000000000..0359f14f1
--- /dev/null
+++ b/tests/metagpt/tools/functions/test_vision.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2024/01/15
+@Author : mannaandpoem
+@File : test_vision.py
+"""
+import base64
+from unittest.mock import AsyncMock
+
+from pytest_mock import mocker
+
+from metagpt import logs
+from metagpt.tools.functions.libs.vision import Vision
+
+
+def test_vision_generate_web_pages():
+ image_path = "./image.png"
+ vision = Vision()
+ rsp = vision.generate_web_pages(image_path=image_path)
+ logs.logger.info(rsp)
+ assert "html" in rsp
+ assert "css" in rsp
+ assert "javascript" in rsp
+
+
+def test_save_webpages():
+ image_path = "./image.png"
+ vision = Vision()
+ webpages = """```html: \n
+ \n```
+ "```css: .class { ... } ```\n ```javascript: function() { ... }```"""
+ webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
+ logs.logger.info(webpages_dir)
+ assert webpages_dir.exists()
+ assert (webpages_dir / "index.html").exists()
+ assert (webpages_dir / "style.css").exists() or (webpages_dir / "styles.css").exists()
+ assert (webpages_dir / "script.js").exists() or (webpages_dir / "scripts.js").exists()
+
+
From 66db86ae2a66ebd532bbdc67f03a89c8a638cfee Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 15 Jan 2024 18:19:57 +0800
Subject: [PATCH 8/8] update test_vision.py for mock
---
.../tools/functions/libs/test_vision.py | 48 +++++++++++++++++++
tests/metagpt/tools/functions/test_vision.py | 40 ----------------
2 files changed, 48 insertions(+), 40 deletions(-)
create mode 100644 tests/metagpt/tools/functions/libs/test_vision.py
delete mode 100644 tests/metagpt/tools/functions/test_vision.py
diff --git a/tests/metagpt/tools/functions/libs/test_vision.py b/tests/metagpt/tools/functions/libs/test_vision.py
new file mode 100644
index 000000000..f4f97c46a
--- /dev/null
+++ b/tests/metagpt/tools/functions/libs/test_vision.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2024/01/15
+@Author : mannaandpoem
+@File : test_vision.py
+"""
+import pytest
+
+from metagpt import logs
+from metagpt.tools.functions.libs.vision import Vision
+
+
+@pytest.fixture
+def mock_webpages():
+ return """```html\n\n
+\n\n```\n
+```css\n.class { ... }\n```\n
+```javascript\nfunction() { ... }\n```\n"""
+
+
+def test_vision_generate_webpages(mocker, mock_webpages):
+ mocker.patch(
+ "metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
+ return_value=mock_webpages
+ )
+ image_path = "image.png"
+ vision = Vision()
+ rsp = vision.generate_web_pages(image_path=image_path)
+ logs.logger.info(rsp)
+ assert "html" in rsp
+ assert "css" in rsp
+ assert "javascript" in rsp
+
+
+def test_save_webpages(mocker, mock_webpages):
+ mocker.patch(
+ "metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
+ return_value=mock_webpages
+ )
+ image_path = "image.png"
+ vision = Vision()
+ webpages = vision.generate_web_pages(image_path)
+ webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
+ logs.logger.info(webpages_dir)
+ assert webpages_dir.exists()
+
+
diff --git a/tests/metagpt/tools/functions/test_vision.py b/tests/metagpt/tools/functions/test_vision.py
deleted file mode 100644
index 0359f14f1..000000000
--- a/tests/metagpt/tools/functions/test_vision.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-@Time : 2024/01/15
-@Author : mannaandpoem
-@File : test_vision.py
-"""
-import base64
-from unittest.mock import AsyncMock
-
-from pytest_mock import mocker
-
-from metagpt import logs
-from metagpt.tools.functions.libs.vision import Vision
-
-
-def test_vision_generate_web_pages():
- image_path = "./image.png"
- vision = Vision()
- rsp = vision.generate_web_pages(image_path=image_path)
- logs.logger.info(rsp)
- assert "html" in rsp
- assert "css" in rsp
- assert "javascript" in rsp
-
-
-def test_save_webpages():
- image_path = "./image.png"
- vision = Vision()
- webpages = """```html: \n
- \n```
- "```css: .class { ... } ```\n ```javascript: function() { ... }```"""
- webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
- logs.logger.info(webpages_dir)
- assert webpages_dir.exists()
- assert (webpages_dir / "index.html").exists()
- assert (webpages_dir / "style.css").exists() or (webpages_dir / "styles.css").exists()
- assert (webpages_dir / "script.js").exists() or (webpages_dir / "scripts.js").exists()
-
-