mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-05 13:52:38 +02:00
add vision tool for code_interpreter
This commit is contained in:
parent
a46d3d5b1c
commit
40f5d5e40e
4 changed files with 114 additions and 0 deletions
|
|
@ -17,6 +17,7 @@ from metagpt.prompts.tool_type import (
|
|||
FEATURE_ENGINEERING_PROMPT,
|
||||
MODEL_TRAIN_PROMPT,
|
||||
MODEL_EVALUATE_PROMPT,
|
||||
VISION_PROMPT
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -71,6 +72,12 @@ TOOL_TYPE_MAPPINGS = {
|
|||
desc="Only for evaluating model.",
|
||||
usage_prompt=MODEL_EVALUATE_PROMPT,
|
||||
),
|
||||
"vision": ToolType(
|
||||
name="vision",
|
||||
module=str(TOOL_LIBS_PATH / "vision"),
|
||||
desc="Only for converting image into webpage code.",
|
||||
usage_prompt=VISION_PROMPT,
|
||||
),
|
||||
"other": ToolType(
|
||||
name="other",
|
||||
module="",
|
||||
|
|
|
|||
81
metagpt/tools/functions/libs/vision.py
Normal file
81
metagpt/tools/functions/libs/vision.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2024/01/12
|
||||
@Author : mannaandpoem
|
||||
@File : vision.py
|
||||
"""
|
||||
import requests
|
||||
|
||||
import base64
|
||||
|
||||
OPENAI_API_BASE = "..."
|
||||
API_KEY = "sk-..."
|
||||
MODEL = "..."
|
||||
MAX_TOKENS = 4096
|
||||
|
||||
|
||||
class Vision:
|
||||
def __init__(self):
|
||||
self.api_key = API_KEY
|
||||
self.model = MODEL
|
||||
self.max_tokens = MAX_TOKENS
|
||||
|
||||
def analyze_layout(
|
||||
self,
|
||||
image_path,
|
||||
prompt="You are now a UI/UX, please generate layout information for this image: \n\n"
|
||||
"NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design."
|
||||
"As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
|
||||
):
|
||||
print(f"analyze_layout: {image_path}")
|
||||
return self.get_result(image_path, prompt)
|
||||
|
||||
def generate_web_pages(
|
||||
self,
|
||||
image_path,
|
||||
prompt="You are now a UI/UX and Web Developer. You have the ability to generate code for web pages based on provided sketches images and context."
|
||||
"Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. "
|
||||
"NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. "
|
||||
"As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
|
||||
"\n\nNow, please generate the corresponding webpage code including HTML, CSS and JavaScript:"
|
||||
):
|
||||
layout = self.analyze_layout(image_path)
|
||||
prompt += "\n\n # Context\n The layout information of the sketch image is: \n" + layout
|
||||
return self.get_result(image_path, prompt)
|
||||
|
||||
def get_result(self, image_path, prompt):
|
||||
base64_image = self.encode_image(image_path)
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": self.max_tokens,
|
||||
}
|
||||
response = requests.post(f"{OPENAI_API_BASE}/chat/completions", headers=headers, json=payload)
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
|
||||
@staticmethod
|
||||
def encode_image(image_path):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
vision = Vision()
|
||||
rsp = vision.generate_web_pages(image_path="./img.png")
|
||||
print(rsp)
|
||||
20
metagpt/tools/functions/schemas/vision.yml
Normal file
20
metagpt/tools/functions/schemas/vision.yml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
Vision:
|
||||
type: class
|
||||
description: "Class for generating web pages at once."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize Vision class with default values."
|
||||
|
||||
generate_web_pages:
|
||||
description: "Generate web pages including all code(HTML, CSS and JavaScript) in one go based on the image."
|
||||
parameters:
|
||||
properties:
|
||||
image_path:
|
||||
type: str
|
||||
description: "The path of the image file"
|
||||
|
||||
required:
|
||||
- image_path
|
||||
returns:
|
||||
type: str
|
||||
description: "Generated web page content."
|
||||
Loading…
Add table
Add a link
Reference in a new issue