1. add vision config in config.yaml

2. add imitate_webpage.py in example
3. update vision.py
This commit is contained in:
mannaandpoem 2024-01-15 11:13:35 +08:00
parent 40f5d5e40e
commit f45a368be2
3 changed files with 65 additions and 25 deletions

View file

@ -86,6 +86,20 @@ TIMEOUT: 60 # Timeout for llm invocation
#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
#AZURE_TTS_REGION: "eastus"
#### for OPENAI VISION
OPENAI_VISION_URL: "https://openai-forward.metadl.com/v1"
OPENAI_VISION_KEY: "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"
OPENAI_VISION_MODEL: "gpt-4-vision-preview"
VISION_MAX_TOKENS: 4096
#### for AZURE VISION
#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT"
#AZURE_VISION_KEY: "YOUR_API_KEY"
#AZURE_VISION_REGION: "YOUR_VISION_REGION_NAME"
#VISION_MAX_TOKENS: 4096
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
#SD_URL: "YOUR_SD_URL"

View file

@ -0,0 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : imitate_webpage.py
"""
from metagpt.roles.code_interpreter import CodeInterpreter
async def main():
prompt = """This is a URL of webpage: https://cn.bing.com/
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Finally, save webpage in a text file.
Note: All required dependencies and environments have been fully installed and configured."""
ci = CodeInterpreter(goal=prompt, use_tools=True)
await ci.run(prompt)
if __name__ == '__main__':
import asyncio
asyncio.run(main())

View file

@ -9,39 +9,40 @@ import requests
import base64
OPENAI_API_BASE = "..."
API_KEY = "sk-..."
MODEL = "..."
MAX_TOKENS = 4096
from metagpt.config import CONFIG
OPENAI_API_BASE = CONFIG.OPENAI_VISION_URL
API_KEY = CONFIG.OPENAI_VISION_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """
GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages
based on provided sketches images and context.
Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript.
NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design.
As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry.
Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
class Vision:
def __init__(self):
self.api_key = API_KEY
self.model = MODEL
self.max_tokens = MAX_TOKENS
self.max_tokens = 4096
def analyze_layout(
self,
image_path,
prompt="You are now a UI/UX, please generate layout information for this image: \n\n"
"NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design."
"As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
):
print(f"analyze_layout: {image_path}")
return self.get_result(image_path, prompt)
def analyze_layout(self, image_path):
return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
def generate_web_pages(
self,
image_path,
prompt="You are now a UI/UX and Web Developer. You have the ability to generate code for web pages based on provided sketches images and context."
"Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. "
"NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. "
"As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it."
"\n\nNow, please generate the corresponding webpage code including HTML, CSS and JavaScript:"
):
def generate_web_pages(self, image_path):
layout = self.analyze_layout(image_path)
prompt += "\n\n # Context\n The layout information of the sketch image is: \n" + layout
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
return self.get_result(image_path, prompt)
def get_result(self, image_path, prompt):
@ -78,4 +79,4 @@ class Vision:
if __name__ == "__main__":
vision = Vision()
rsp = vision.generate_web_pages(image_path="./img.png")
print(rsp)
print(rsp)