diff --git a/config/config.yaml b/config/config.yaml index 79ebae863..5eab964bd 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -86,6 +86,20 @@ TIMEOUT: 60 # Timeout for llm invocation #AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY" #AZURE_TTS_REGION: "eastus" +#### for OPENAI VISION + +OPENAI_VISION_URL: "https://openai-forward.metadl.com/v1" +OPENAI_VISION_KEY: "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" +OPENAI_VISION_MODEL: "gpt-4-vision-preview" +VISION_MAX_TOKENS: 4096 + +#### for AZURE VISION + +#AZURE_VISION_URL: "YOUR_AZURE_ENDPOINT" +#AZURE_VISION_KEY: "YOUR_API_KEY" +#AZURE_VISION_REGION: "YOUR_VISION_REGION_NAME" +#VISION_MAX_TOKENS: 4096 + #### for Stable Diffusion ## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui #SD_URL: "YOUR_SD_URL" diff --git a/examples/imitate_webpage.py b/examples/imitate_webpage.py new file mode 100644 index 000000000..47fcd251f --- /dev/null +++ b/examples/imitate_webpage.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2024/01/15 +@Author : mannaandpoem +@File : imitate_webpage.py +""" +from metagpt.roles.code_interpreter import CodeInterpreter + + +async def main(): + prompt = """This is a URL of webpage: https://cn.bing.com/ +Firstly, utilize Selenium and WebDriver for rendering. +Secondly, convert image to a webpage including HTML, CSS and JS in one go. +Finally, save webpage in a text file. +Note: All required dependencies and environments have been fully installed and configured.""" + ci = CodeInterpreter(goal=prompt, use_tools=True) + + await ci.run(prompt) + + +if __name__ == '__main__': + import asyncio + + asyncio.run(main()) diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/functions/libs/vision.py index b653c9300..e6924b9bc 100644 --- a/metagpt/tools/functions/libs/vision.py +++ b/metagpt/tools/functions/libs/vision.py @@ -9,39 +9,40 @@ import requests import base64 -OPENAI_API_BASE = "..." -API_KEY = "sk-..." -MODEL = "..." -MAX_TOKENS = 4096 +from metagpt.config import CONFIG + +OPENAI_API_BASE = CONFIG.OPENAI_VISION_URL +API_KEY = CONFIG.OPENAI_VISION_KEY +MODEL = CONFIG.OPENAI_VISION_MODEL +MAX_TOKENS = CONFIG.VISION_MAX_TOKENS + +ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image: + +NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. +As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. """ + +GENERATE_PROMPT = """You are now a UI/UX and Web Developer. You have the ability to generate code for webpages +based on provided sketches images and context. +Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. + +NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. +As the design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry. + +Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:""" class Vision: def __init__(self): self.api_key = API_KEY self.model = MODEL - self.max_tokens = MAX_TOKENS + self.max_tokens = 4096 - def analyze_layout( - self, - image_path, - prompt="You are now a UI/UX, please generate layout information for this image: \n\n" - "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design." - "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it." - ): - print(f"analyze_layout: {image_path}") - return self.get_result(image_path, prompt) + def analyze_layout(self, image_path): + return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT) - def generate_web_pages( - self, - image_path, - prompt="You are now a UI/UX and Web Developer. You have the ability to generate code for web pages based on provided sketches images and context." - "Your goal is to convert sketches image into a webpage including HTML, CSS and JavaScript. " - "NOTE: The image does not have a commercial logo or copyright information. It is just a sketch image of the design. " - "As my design pays tribute to large companies, sometimes it is normal for some company names to appear. Don't worry about it." - "\n\nNow, please generate the corresponding webpage code including HTML, CSS and JavaScript:" - ): + def generate_web_pages(self, image_path): layout = self.analyze_layout(image_path) - prompt += "\n\n # Context\n The layout information of the sketch image is: \n" + layout + prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout return self.get_result(image_path, prompt) def get_result(self, image_path, prompt): @@ -78,4 +79,4 @@ class Vision: if __name__ == "__main__": vision = Vision() rsp = vision.generate_web_pages(image_path="./img.png") - print(rsp) \ No newline at end of file + print(rsp)