mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-05 22:02:38 +02:00
Merge branch 'code_intepreter_add_vision' into 'code_intepreter'
add vision tool for code_interpreter See merge request agents/data_agents_opt!47
This commit is contained in:
commit
42a106ca26
7 changed files with 257 additions and 0 deletions
|
|
@ -86,6 +86,11 @@ TIMEOUT: 60 # Timeout for llm invocation
|
|||
#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
|
||||
#AZURE_TTS_REGION: "eastus"
|
||||
|
||||
#### for OPENAI VISION
|
||||
|
||||
#OPENAI_VISION_MODEL: "YOUR_VISION_MODEL_NAME"
|
||||
#VISION_MAX_TOKENS: 4096
|
||||
|
||||
#### for Stable Diffusion
|
||||
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
|
||||
#SD_URL: "YOUR_SD_URL"
|
||||
|
|
|
|||
26
examples/imitate_webpage.py
Normal file
26
examples/imitate_webpage.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2024/01/15
|
||||
@Author : mannaandpoem
|
||||
@File : imitate_webpage.py
|
||||
"""
|
||||
from metagpt.roles.code_interpreter import CodeInterpreter
|
||||
|
||||
|
||||
async def main():
|
||||
web_url = 'https://pytorch.org/'
|
||||
prompt = f"""This is a URL of webpage: '{web_url}' .
|
||||
Firstly, utilize Selenium and WebDriver for rendering.
|
||||
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
|
||||
Finally, save webpage in a text file.
|
||||
Note: All required dependencies and environments have been fully installed and configured."""
|
||||
ci = CodeInterpreter(goal=prompt, use_tools=True)
|
||||
|
||||
await ci.run(prompt)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -37,3 +37,9 @@ The current task is about evaluating a model, please note the following:
|
|||
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
|
||||
- Use trained model from previous task result directly, do not mock or reload model yourself.
|
||||
"""
|
||||
|
||||
# Prompt for using tools of "vision" type
|
||||
VISION_PROMPT = """
|
||||
The current task is about converting image into webpage code. please note the following:
|
||||
- Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
|
||||
"""
|
||||
|
|
@ -16,6 +16,7 @@ from metagpt.prompts.tool_type import (
|
|||
FEATURE_ENGINEERING_PROMPT,
|
||||
MODEL_TRAIN_PROMPT,
|
||||
MODEL_EVALUATE_PROMPT,
|
||||
VISION_PROMPT
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -76,6 +77,12 @@ TOOL_TYPE_MAPPINGS = {
|
|||
desc="Related to text2image, image2image using stable diffusion model.",
|
||||
usage_prompt="",
|
||||
),
|
||||
"vision": ToolType(
|
||||
name="vision",
|
||||
module=str(TOOL_LIBS_PATH / "vision"),
|
||||
desc="Only for converting image into webpage code.",
|
||||
usage_prompt=VISION_PROMPT,
|
||||
),
|
||||
"other": ToolType(
|
||||
name="other",
|
||||
module="",
|
||||
|
|
|
|||
129
metagpt/tools/functions/libs/vision.py
Normal file
129
metagpt/tools/functions/libs/vision.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2024/01/12
|
||||
@Author : mannaandpoem
|
||||
@File : vision.py
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
import base64
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
|
||||
OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
|
||||
API_KEY = CONFIG.OPENAI_API_KEY
|
||||
MODEL = CONFIG.OPENAI_VISION_MODEL
|
||||
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
|
||||
|
||||
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
|
||||
|
||||
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
|
||||
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """
|
||||
|
||||
GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages
|
||||
based on provided sketches images and context.
|
||||
Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript.
|
||||
|
||||
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
|
||||
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry.
|
||||
|
||||
Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
|
||||
|
||||
|
||||
class Vision:
|
||||
def __init__(self):
|
||||
self.api_key = API_KEY
|
||||
self.api_base = OPENAI_API_BASE
|
||||
self.model = MODEL
|
||||
self.max_tokens = MAX_TOKENS
|
||||
|
||||
def analyze_layout(self, image_path):
|
||||
return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
|
||||
|
||||
def generate_web_pages(self, image_path):
|
||||
layout = self.analyze_layout(image_path)
|
||||
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
|
||||
result = self.get_result(image_path, prompt)
|
||||
return result
|
||||
|
||||
def get_result(self, image_path, prompt):
|
||||
base64_image = self.encode_image(image_path)
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": self.max_tokens,
|
||||
}
|
||||
response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
|
||||
else:
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
|
||||
@staticmethod
|
||||
def encode_image(image_path):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
@staticmethod
|
||||
def save_webpages(image_path, webpages) -> Path:
|
||||
# 在当前目录下创建一个名为webpages的文件夹,用于存储html、css和js文件
|
||||
webpages_path = Path(image_path).parent / "webpages"
|
||||
webpages_path.mkdir(exist_ok=True)
|
||||
|
||||
try:
|
||||
index_path = webpages_path / "index.html"
|
||||
index = webpages.split("```html")[1].split("```")[0]
|
||||
except IndexError:
|
||||
raise ValueError("No html code found in the result, please check your image and try again.")
|
||||
|
||||
try:
|
||||
if "styles.css" in index:
|
||||
style_path = webpages_path / "styles.css"
|
||||
elif "style.css" in index:
|
||||
style_path = webpages_path / "style.css"
|
||||
else:
|
||||
style_path = None
|
||||
style = webpages.split("```css")[1].split("```")[0] if style_path else ""
|
||||
|
||||
if "scripts.js" in index:
|
||||
js_path = webpages_path / "scripts.js"
|
||||
elif "script.js" in index:
|
||||
js_path = webpages_path / "script.js"
|
||||
else:
|
||||
js_path = None
|
||||
js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
|
||||
except IndexError:
|
||||
raise ValueError("No css or js code found in the result, please check your image and try again.")
|
||||
|
||||
try:
|
||||
with open(index_path, "w") as f:
|
||||
f.write(index)
|
||||
if style_path:
|
||||
with open(style_path, "w") as f:
|
||||
f.write(style)
|
||||
if js_path:
|
||||
with open(js_path, "w") as f:
|
||||
f.write(js)
|
||||
except FileNotFoundError as e:
|
||||
raise FileNotFoundError(f"Cannot save the webpages to {str(webpages_path)}") from e
|
||||
|
||||
return webpages_path
|
||||
36
metagpt/tools/functions/schemas/vision.yml
Normal file
36
metagpt/tools/functions/schemas/vision.yml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
Vision:
|
||||
type: class
|
||||
description: "Class for generating web pages at once."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize Vision class with default values."
|
||||
|
||||
generate_web_pages:
|
||||
description: "Generate web pages including all code(HTML, CSS and JavaScript) in one go based on the image."
|
||||
parameters:
|
||||
properties:
|
||||
image_path:
|
||||
type: str
|
||||
description: "The path of the image file"
|
||||
required:
|
||||
- image_path
|
||||
returns:
|
||||
type: str
|
||||
description: "Generated webpages content."
|
||||
|
||||
save_webpages:
|
||||
description: "Save webpages including all code(HTML, CSS and JavaScript) at once"
|
||||
parameters:
|
||||
properties:
|
||||
image_path:
|
||||
type: str
|
||||
description: "The path of the image file"
|
||||
webpages:
|
||||
type: str
|
||||
description: "The generated webpages content"
|
||||
required:
|
||||
- image_path
|
||||
- webpages
|
||||
returns:
|
||||
type: Path
|
||||
description: "The path of the saved webpages"
|
||||
48
tests/metagpt/tools/functions/libs/test_vision.py
Normal file
48
tests/metagpt/tools/functions/libs/test_vision.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2024/01/15
|
||||
@Author : mannaandpoem
|
||||
@File : test_vision.py
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from metagpt import logs
|
||||
from metagpt.tools.functions.libs.vision import Vision
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_webpages():
|
||||
return """```html\n<html>\n<script src="scripts.js"></script>
|
||||
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
|
||||
```css\n.class { ... }\n```\n
|
||||
```javascript\nfunction() { ... }\n```\n"""
|
||||
|
||||
|
||||
def test_vision_generate_webpages(mocker, mock_webpages):
|
||||
mocker.patch(
|
||||
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
|
||||
return_value=mock_webpages
|
||||
)
|
||||
image_path = "image.png"
|
||||
vision = Vision()
|
||||
rsp = vision.generate_web_pages(image_path=image_path)
|
||||
logs.logger.info(rsp)
|
||||
assert "html" in rsp
|
||||
assert "css" in rsp
|
||||
assert "javascript" in rsp
|
||||
|
||||
|
||||
def test_save_webpages(mocker, mock_webpages):
|
||||
mocker.patch(
|
||||
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
|
||||
return_value=mock_webpages
|
||||
)
|
||||
image_path = "image.png"
|
||||
vision = Vision()
|
||||
webpages = vision.generate_web_pages(image_path)
|
||||
webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
|
||||
logs.logger.info(webpages_dir)
|
||||
assert webpages_dir.exists()
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue