Merge branch 'code_intepreter_add_vision' into 'code_intepreter'

add vision tool for code_interpreter

See merge request agents/data_agents_opt!47
This commit is contained in:
林义章 2024-01-17 10:05:48 +00:00
commit 42a106ca26
7 changed files with 257 additions and 0 deletions

View file

@ -86,6 +86,11 @@ TIMEOUT: 60 # Timeout for llm invocation
#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
#AZURE_TTS_REGION: "eastus"
#### for OPENAI VISION
#OPENAI_VISION_MODEL: "YOUR_VISION_MODEL_NAME"
#VISION_MAX_TOKENS: 4096
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
#SD_URL: "YOUR_SD_URL"

View file

@ -0,0 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : imitate_webpage.py
"""
from metagpt.roles.code_interpreter import CodeInterpreter
async def main():
web_url = 'https://pytorch.org/'
prompt = f"""This is a URL of webpage: '{web_url}' .
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Finally, save webpage in a text file.
Note: All required dependencies and environments have been fully installed and configured."""
ci = CodeInterpreter(goal=prompt, use_tools=True)
await ci.run(prompt)
if __name__ == '__main__':
import asyncio
asyncio.run(main())

View file

@ -37,3 +37,9 @@ The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""
# Prompt for using tools of "vision" type
VISION_PROMPT = """
The current task is about converting image into webpage code. please note the following:
- Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
"""

View file

@ -16,6 +16,7 @@ from metagpt.prompts.tool_type import (
FEATURE_ENGINEERING_PROMPT,
MODEL_TRAIN_PROMPT,
MODEL_EVALUATE_PROMPT,
VISION_PROMPT
)
@ -76,6 +77,12 @@ TOOL_TYPE_MAPPINGS = {
desc="Related to text2image, image2image using stable diffusion model.",
usage_prompt="",
),
"vision": ToolType(
name="vision",
module=str(TOOL_LIBS_PATH / "vision"),
desc="Only for converting image into webpage code.",
usage_prompt=VISION_PROMPT,
),
"other": ToolType(
name="other",
module="",

View file

@ -0,0 +1,129 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/12
@Author : mannaandpoem
@File : vision.py
"""
from pathlib import Path
import requests
import base64
from metagpt.config import CONFIG
OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
API_KEY = CONFIG.OPENAI_API_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """
GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages
based on provided sketches images and context.
Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript.
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry.
Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
class Vision:
def __init__(self):
self.api_key = API_KEY
self.api_base = OPENAI_API_BASE
self.model = MODEL
self.max_tokens = MAX_TOKENS
def analyze_layout(self, image_path):
return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
def generate_web_pages(self, image_path):
layout = self.analyze_layout(image_path)
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
result = self.get_result(image_path, prompt)
return result
def get_result(self, image_path, prompt):
base64_image = self.encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
}
],
"max_tokens": self.max_tokens,
}
response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
if response.status_code != 200:
raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
else:
return response.json()["choices"][0]["message"]["content"]
@staticmethod
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
@staticmethod
def save_webpages(image_path, webpages) -> Path:
# 在当前目录下创建一个名为webpages的文件夹用于存储html、css和js文件
webpages_path = Path(image_path).parent / "webpages"
webpages_path.mkdir(exist_ok=True)
try:
index_path = webpages_path / "index.html"
index = webpages.split("```html")[1].split("```")[0]
except IndexError:
raise ValueError("No html code found in the result, please check your image and try again.")
try:
if "styles.css" in index:
style_path = webpages_path / "styles.css"
elif "style.css" in index:
style_path = webpages_path / "style.css"
else:
style_path = None
style = webpages.split("```css")[1].split("```")[0] if style_path else ""
if "scripts.js" in index:
js_path = webpages_path / "scripts.js"
elif "script.js" in index:
js_path = webpages_path / "script.js"
else:
js_path = None
js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
except IndexError:
raise ValueError("No css or js code found in the result, please check your image and try again.")
try:
with open(index_path, "w") as f:
f.write(index)
if style_path:
with open(style_path, "w") as f:
f.write(style)
if js_path:
with open(js_path, "w") as f:
f.write(js)
except FileNotFoundError as e:
raise FileNotFoundError(f"Cannot save the webpages to {str(webpages_path)}") from e
return webpages_path

View file

@ -0,0 +1,36 @@
Vision:
type: class
description: "Class for generating web pages at once."
methods:
__init__:
description: "Initialize Vision class with default values."
generate_web_pages:
description: "Generate web pages including all code(HTML, CSS and JavaScript) in one go based on the image."
parameters:
properties:
image_path:
type: str
description: "The path of the image file"
required:
- image_path
returns:
type: str
description: "Generated webpages content."
save_webpages:
description: "Save webpages including all code(HTML, CSS and JavaScript) at once"
parameters:
properties:
image_path:
type: str
description: "The path of the image file"
webpages:
type: str
description: "The generated webpages content"
required:
- image_path
- webpages
returns:
type: Path
description: "The path of the saved webpages"

View file

@ -0,0 +1,48 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : test_vision.py
"""
import pytest
from metagpt import logs
from metagpt.tools.functions.libs.vision import Vision
@pytest.fixture
def mock_webpages():
return """```html\n<html>\n<script src="scripts.js"></script>
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
```css\n.class { ... }\n```\n
```javascript\nfunction() { ... }\n```\n"""
def test_vision_generate_webpages(mocker, mock_webpages):
mocker.patch(
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
return_value=mock_webpages
)
image_path = "image.png"
vision = Vision()
rsp = vision.generate_web_pages(image_path=image_path)
logs.logger.info(rsp)
assert "html" in rsp
assert "css" in rsp
assert "javascript" in rsp
def test_save_webpages(mocker, mock_webpages):
mocker.patch(
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
return_value=mock_webpages
)
image_path = "image.png"
vision = Vision()
webpages = vision.generate_web_pages(image_path)
webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()