diff --git a/config/config.yaml b/config/config.yaml index 3bca12afa..fbd9aa81e 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,4 +1,3 @@ - # DO NOT MODIFY THIS FILE, create a new key.yaml, define OPENAI_API_KEY. # The configuration of key.yaml has a higher priority and will not enter git @@ -36,3 +35,8 @@ RPM: 10 #AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY" #AZURE_TTS_REGION: "eastus" + +#### for Stable Diffusion +## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui +SD_URL: "YOUR_SD_URL" +SD_T2I_API: "/sdapi/v1/txt2img" diff --git a/metagpt/actions/action.py b/metagpt/actions/action.py index a390a8350..905b044df 100644 --- a/metagpt/actions/action.py +++ b/metagpt/actions/action.py @@ -13,7 +13,7 @@ from metagpt.actions.action_output import ActionOutput from tenacity import retry, stop_after_attempt, wait_fixed from pydantic import BaseModel from metagpt.utils.common import OutputParser - +from metagpt.logs import logger class Action(ABC): def __init__(self, name: str = '', context=None, llm: LLM = None): @@ -55,8 +55,10 @@ class Action(ABC): system_msgs = [] system_msgs.append(self.prefix) content = await self.llm.aask(prompt, system_msgs) + logger.debug(content) output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping) parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping) + logger.debug(parsed_data) instruct_content = output_class(**parsed_data) return ActionOutput(content, instruct_content) diff --git a/metagpt/actions/write_prd.py b/metagpt/actions/write_prd.py index 449f073c4..957566aab 100644 --- a/metagpt/actions/write_prd.py +++ b/metagpt/actions/write_prd.py @@ -59,6 +59,7 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. AND '## ' SHOULD W ## Requirement Pool: Provided as Python list[str, str], the parameters are requirement description, priority(P0/P1/P2), respectively, comply with PEP standards; no more than 5 requirements and consider to make its difficulty lower +## UI Design draft: Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description. ## Anything UNCLEAR: Provide as Plain text. Make clear here. """ FORMAT_EXAMPLE = """ @@ -105,6 +106,9 @@ The product should be a ... ] ``` +## UI Design draft +Give a basic function description, and a draft + ## Anything UNCLEAR There are no unclear points. --- @@ -117,6 +121,7 @@ OUTPUT_MAPPING = { "Competitive Quadrant Chart": (str, ...), "Requirement Analysis": (str, ...), "Requirement Pool": (List[Tuple[str, str]], ...), + "UI Design draft":(str, ...), "Anything UNCLEAR": (str, ...), } @@ -136,5 +141,6 @@ class WritePRD(Action): prompt = PROMPT_TEMPLATE.format(requirements=requirements, search_information=info, format_example=FORMAT_EXAMPLE) + logger.debug(prompt) prd = await self._aask_v1(prompt, "prd", OUTPUT_MAPPING) return prd diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 36269aed2..de3bb3369 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -114,6 +114,7 @@ class Role: def _set_state(self, state): """Update the current state.""" self._rc.state = state + logger.debug(self._actions) self._rc.todo = self._actions[self._rc.state] def set_env(self, env: 'Environment'): @@ -170,8 +171,11 @@ class Role: if not self._rc.env: return 0 env_msgs = self._rc.env.memory.get() + observed = self._rc.env.memory.get_by_actions(self._rc.watch) + already_observed = self._rc.memory.get() + news: list[Message] = [] for i in observed: if i in already_observed: diff --git a/metagpt/tools/sd_engine.py b/metagpt/tools/sd_engine.py new file mode 100644 index 000000000..e462f1bda --- /dev/null +++ b/metagpt/tools/sd_engine.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/7/19 16:28 +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import os +import asyncio +from os.path import join +from typing import List +import json +import io +import base64 + +from aiohttp import ClientSession +from PIL import Image, PngImagePlugin + +from metagpt.logs import logger +from metagpt.config import Config +from metagpt.const import WORKSPACE_ROOT + +config = Config() + +payload = { + "prompt": "", + "negative_prompt": "(easynegative:0.8),black, dark,Low resolution", + "override_settings": { + "sd_model_checkpoint": "galaxytimemachinesGTM_photoV20" + }, + "seed": -1, + "batch_size": 1, + "n_iter": 1, + "steps": 20, + "cfg_scale": 7, + "width": 512, + "height": 768, + "restore_faces": False, + "tiling": False, + "do_not_save_samples": False, + "do_not_save_grid": False, + 'enable_hr': False, + 'hr_scale': 2, + 'hr_upscaler': 'Latent', + 'hr_second_pass_steps': 0, + 'hr_resize_x': 0, + 'hr_resize_y': 0, + 'hr_upscale_to_x': 0, + 'hr_upscale_to_y': 0, + 'truncate_x': 0, + 'truncate_y': 0, + 'applied_old_hires_behavior_to': None, + "eta": None, + + "sampler_index": "DPM++ SDE Karras", + "alwayson_scripts": {} +} + +default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution" + + +class SDEngine: + def __init__(self): + # Initialize the SDEngine with configuration + self.config = Config() + self.sd_url = self.config.get('SD_URL') + self.sd_t2i_url = f"{self.sd_url}{self.config.get('SD_T2I_API')}" + # Define default payload settings for SD API + self.payload = payload + logger.info(self.sd_t2i_url) + + def construct_payload(self, prompt, negtive_prompt=default_negative_prompt, width=512, height=512, + sd_model="galaxytimemachinesGTM_photoV20"): + # Configure the payload with provided inputs + self.payload["prompt"] = prompt + self.payload["negtive_prompt"] = negtive_prompt + self.payload["width"] = width + self.payload["height"] = height + self.payload["override_settings"]["sd_model_checkpoint"] = sd_model + logger.info(f"call sd payload is {self.payload}") + return self.payload + + def _save(self, imgs, save_name=""): + save_dir = WORKSPACE_ROOT / "resources"/"SD_Output" + if not os.path.exists(save_dir): + os.makedirs(save_dir, exist_ok=True) + batch_decode_base64_to_image(imgs, save_dir, save_name=save_name) + + async def run_t2i(self, prompts: List): + # Asynchronously run the SD API for multiple prompts + session = ClientSession() + for payload_idx, payload in enumerate(prompts): + results = await self.run(url=self.sd_t2i_url, payload=payload, session=session) + self._save(results, save_name=f"output_{payload_idx}") + await session.close() + + async def run(self, url, payload, session): + # Perform the HTTP POST request to the SD API + async with session.post(url, json=payload, timeout=600) as rsp: + data = await rsp.read() + + rsp_json = json.loads(data) + imgs = rsp_json['images'] + logger.info(f"callback rsp json is {rsp_json.keys()}") + return imgs + + async def run_i2i(self): + # todo: 添加图生图接口调用 + raise NotImplementedError + + async def run_sam(self): + # todo:添加SAM接口调用 + raise NotImplementedError + +def decode_base64_to_image(img, save_name): + image = Image.open(io.BytesIO(base64.b64decode(img.split(",", 1)[0]))) + pnginfo = PngImagePlugin.PngInfo() + logger.info(save_name) + image.save(f"{save_name}.png", pnginfo=pnginfo) + return pnginfo, image + + +def batch_decode_base64_to_image(imgs, save_dir="", save_name=""): + for idx, _img in enumerate(imgs): + save_name = join(save_dir, save_name) + decode_base64_to_image(_img, save_name=save_name) + + +if __name__ == "__main__": + import asyncio + + engine = SDEngine() + prompt = "pixel style, game design, a game interface should be minimalistic and intuitive with the score and high score displayed at the top. The snake and its food should be easily distinguishable. The game should have a simple color scheme, with a contrasting color for the snake and its food. Complete interface boundary" + + engine.construct_payload(prompt) + + event_loop = asyncio.get_event_loop() + event_loop.run_until_complete(engine.run_t2i(prompt)) diff --git a/tests/metagpt/actions/test_ui_design.py b/tests/metagpt/actions/test_ui_design.py new file mode 100644 index 000000000..d284b20f2 --- /dev/null +++ b/tests/metagpt/actions/test_ui_design.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/7/22 02:40 +# @Author : stellahong (stellahong@fuzhi.ai) +# +from tests.metagpt.roles.ui_role import UIDesign + +llm_resp= ''' + # UI Design Description +```The user interface for the snake game will be designed in a way that is simple, clean, and intuitive. The main elements of the game such as the game grid, snake, food, score, and game over message will be clearly defined and easy to understand. The game grid will be centered on the screen with the score displayed at the top. The game controls will be intuitive and easy to use. The design will be modern and minimalist with a pleasing color scheme.``` + +## Selected Elements + +Game Grid: The game grid will be a rectangular area in the center of the screen where the game will take place. It will be defined by a border and will have a darker background color. + +Snake: The snake will be represented by a series of connected blocks that move across the grid. The color of the snake will be different from the background color to make it stand out. + +Food: The food will be represented by small objects that are a different color from the snake and the background. The food will be randomly placed on the grid. + +Score: The score will be displayed at the top of the screen. The score will increase each time the snake eats a piece of food. + +Game Over: When the game is over, a message will be displayed in the center of the screen. The player will be given the option to restart the game. + +## HTML Layout +```html + + + + + + Snake Game + + + +
Score: 0
+
+ +
+
Game Over
+ + +``` + +## CSS Styles (styles.css) +```css +body { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100vh; + margin: 0; + background-color: #f0f0f0; +} + +.score { + font-size: 2em; + margin-bottom: 1em; +} + +.game-grid { + width: 400px; + height: 400px; + display: grid; + grid-template-columns: repeat(20, 1fr); + grid-template-rows: repeat(20, 1fr); + gap: 1px; + background-color: #222; + border: 1px solid #555; +} + +.snake-segment { + background-color: #00cc66; +} + +.food { + background-color: #cc3300; +} + +.control-panel { + display: flex; + justify-content: space-around; + width: 400px; + margin-top: 1em; +} + +.control-button { + padding: 1em; + font-size: 1em; + border: none; + background-color: #555; + color: #fff; + cursor: pointer; +} + +.game-over { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 3em; + ''' + +def test_ui_design_parse_css(): + ui_design_work = UIDesign(name="UI design action") + + css = ''' + body { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100vh; + margin: 0; + background-color: #f0f0f0; +} + +.score { + font-size: 2em; + margin-bottom: 1em; +} + +.game-grid { + width: 400px; + height: 400px; + display: grid; + grid-template-columns: repeat(20, 1fr); + grid-template-rows: repeat(20, 1fr); + gap: 1px; + background-color: #222; + border: 1px solid #555; +} + +.snake-segment { + background-color: #00cc66; +} + +.food { + background-color: #cc3300; +} + +.control-panel { + display: flex; + justify-content: space-around; + width: 400px; + margin-top: 1em; +} + +.control-button { + padding: 1em; + font-size: 1em; + border: none; + background-color: #555; + color: #fff; + cursor: pointer; +} + +.game-over { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 3em; + ''' + assert ui_design_work.parse_css_code(context=llm_resp)==css + + +def test_ui_design_parse_html(): + ui_design_work = UIDesign(name="UI design action") + + html = ''' + + + + + + Snake Game + + + +
Score: 0
+
+ +
+
Game Over
+ + + ''' + assert ui_design_work.parse_css_code(context=llm_resp)==html + + + diff --git a/tests/metagpt/roles/test_ui.py b/tests/metagpt/roles/test_ui.py new file mode 100644 index 000000000..285bff323 --- /dev/null +++ b/tests/metagpt/roles/test_ui.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/7/22 02:40 +# @Author : stellahong (stellahong@fuzhi.ai) +# +from metagpt.software_company import SoftwareCompany +from metagpt.roles import ProductManager + +from tests.metagpt.roles.ui_role import UI + + +def test_add_ui(): + ui = UI() + assert ui.profile == "UI Design" + + +async def test_ui_role(idea: str, investment: float = 3.0, n_round: int = 5): + """Run a startup. Be a boss.""" + company = SoftwareCompany() + company.hire([ProductManager(), UI()]) + company.invest(investment) + company.start_project(idea) + await company.run(n_round=n_round) diff --git a/tests/metagpt/roles/ui_role.py b/tests/metagpt/roles/ui_role.py new file mode 100644 index 000000000..101be9c69 --- /dev/null +++ b/tests/metagpt/roles/ui_role.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/7/15 16:40 +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import re +import os +from importlib import import_module +from functools import wraps + +from metagpt.logs import logger +from metagpt.actions import Action, ActionOutput +from metagpt.roles import ProductManager, Role +from metagpt.schema import Message +from metagpt.const import WORKSPACE_ROOT + +from metagpt.actions import WritePRD +from metagpt.software_company import SoftwareCompany +from metagpt.tools.sd_engine import SDEngine + +PROMPT_TEMPLATE = ''' +# Context +{context} + +## Format example +{format_example} +----- +Role: You are a UserInterface Designer; the goal is to finish a UI design according to PRD, give a design description, and select specified elements and UI style. +Requirements: Based on the context, fill in the following missing information, provide detailed HTML and CSS code +Attention: Use '##' to split sections, not '#', and '## ' SHOULD WRITE BEFORE the code and triple quote. + +## UI Design Description:Provide as Plain text, place the design objective here +## Selected Elements:Provide as Plain text, up to 5 specified elements, clear and simple +## HTML Layout:Provide as Plain text, use standard HTML code +## CSS Styles (styles.css):Provide as Plain text,use standard css code +## Anything UNCLEAR:Provide as Plain text. Make clear here. + +''' + +FORMAT_EXAMPLE = ''' + +## UI Design Description +```Snake games are classic and addictive games with simple yet engaging elements. Here are the main elements commonly found in snake games ``` + +## Selected Elements + +Game Grid: The game grid is a rectangular... + +Snake: The player controls a snake that moves across the grid... + +Food: Food items (often represented as small objects or differently colored blocks) + +Score: The player's score increases each time the snake eats a piece of food. The longer the snake becomes, the higher the score. + +Game Over: The game ends when the snake collides with itself or an obstacle. At this point, the player's final score is displayed, and they are given the option to restart the game. + + +## HTML Layout + + + + + + Snake Game + + + +
+ +
+
+ +
+ + + +## CSS Styles (styles.css) +body { + display: flex; + justify-content: center; + align-items: center; + height: 100vh; + margin: 0; + background-color: #f0f0f0; +} + +.game-grid { + width: 400px; + height: 400px; + display: grid; + grid-template-columns: repeat(20, 1fr); /* Adjust to the desired grid size */ + grid-template-rows: repeat(20, 1fr); + gap: 1px; + background-color: #222; + border: 1px solid #555; +} + +.game-grid div { + width: 100%; + height: 100%; + background-color: #444; +} + +.snake-segment { + background-color: #00cc66; /* Snake color */ +} + +.food { + width: 100%; + height: 100%; + background-color: #cc3300; /* Food color */ + position: absolute; +} + +/* Optional styles for a simple game over message */ +.game-over { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 24px; + font-weight: bold; + color: #ff0000; + display: none; +} + +## Anything UNCLEAR +There are no unclear points. + +''' + +OUTPUT_MAPPING = { + "UI Design Description": (str, ...), + "Selected Elements": (str, ...), + "HTML Layout": (str, ...), + "CSS Styles (styles.css)": (str, ...), + "Anything UNCLEAR": (str, ...), +} + + +def load_engine(func): + """Decorator to load an engine by file name and engine name.""" + + @wraps(func) + def wrapper(*args, **kwargs): + file_name, engine_name = func(*args, **kwargs) + engine_file = import_module(file_name, package='metagpt') + ip_module_cls = getattr(engine_file, engine_name) + try: + engine = ip_module_cls() + except: + engine = None + + return engine + + return wrapper + + +def parse(func): + """Decorator to parse information using regex pattern.""" + + @wraps(func) + def wrapper(*args, **kwargs): + context, pattern = func(*args, **kwargs) + match = re.search(pattern, context, re.DOTALL) + if match: + text_info = match.group(1) + logger.info(text_info) + else: + text_info = context + logger.info("未找到匹配的内容") + + return text_info + + return wrapper + + +class UIDesign(Action): + """Class representing the UI Design action.""" + + def __init__(self, name, context=None, llm=None): + super().__init__(name, context, llm) # 需要调用LLM进一步丰富UI设计的prompt + + @parse + def parse_requirement(self, context: str): + """Parse UI Design draft from the context using regex.""" + pattern = r"## UI Design draft.*?\n(.*?)## Anything UNCLEAR" + return context, pattern + + @parse + def parse_ui_elements(self, context: str): + """Parse Selected Elements from the context using regex.""" + pattern = r"## Selected Elements.*?\n(.*?)## HTML Layout" + return context, pattern + + @parse + def parse_css_code(self, context: str): + pattern = r"```css.*?\n(.*?)## Anything UNCLEAR" + return context, pattern + + @parse + def parse_html_code(self, context: str): + pattern = r"```html.*?\n(.*?)```" + return context, pattern + + async def draw_icons(self, context, *args, **kwargs): + """Draw icons using SDEngine.""" + engine = SDEngine() + icon_prompts = self.parse_ui_elements(context) + icons = icon_prompts.split("\n") + icons = [s for s in icons if len(s.strip()) > 0] + prompts_batch = [] + for icon_prompt in icons: + # fixme: 添加icon lora + prompt = engine.construct_payload(icon_prompt + ".") + prompts_batch.append(prompt) + await engine.run_t2i(prompts_batch) + logger.info("Finish icon design using StableDiffusion API") + + async def _save(self, css_content, html_content): + save_dir = WORKSPACE_ROOT / "resources" / 'codes' + if not os.path.exists(save_dir): + os.makedirs(save_dir, exist_ok=True) + # Save CSS and HTML content to files + css_file_path = save_dir / f"ui_design.css" + html_file_path = save_dir / f"ui_design.html" + + with open(css_file_path, 'w') as css_file: + css_file.write(css_content) + with open(html_file_path, 'w') as html_file: + html_file.write(html_content) + + async def run(self, requirements: list[Message], *args, **kwargs) -> ActionOutput: + """Run the UI Design action.""" + # fixme: update prompt (根据需求细化prompt) + context = requirements[-1].content + ui_design_draft = self.parse_requirement(context=context) + # todo: parse requirements str + prompt = PROMPT_TEMPLATE.format(context=ui_design_draft, format_example=FORMAT_EXAMPLE) + logger.info(prompt) + ui_describe = await self._aask_v1(prompt, "ui_design", OUTPUT_MAPPING) + logger.info(ui_describe.content) + logger.info(ui_describe.instruct_content) + css = self.parse_css_code(context=ui_describe.content) + html = self.parse_html_code(context=ui_describe.content) + await self._save(css_content=css, html_content=html) + await self.draw_icons(ui_describe.content) + return ui_describe + + +class UI(Role): + """Class representing the UI Role.""" + + def __init__(self, name="Catherine", profile="UI Design", + goal="Finish a workable and good User Interface design based on a product design", + constraints="Give clear layout description and use standard icons to finish the design", + skills=["SD"]): + super().__init__(name, profile, goal, constraints) + self.load_skills(skills) + self._init_actions([UIDesign]) + self._watch([WritePRD]) + + @load_engine + def load_sd_engine(self): + """Load the SDEngine.""" + file_name = ".tools.sd_engine" + engine_name = "SDEngine" + return file_name, engine_name + + def load_skills(self, skills): + """Load skills for the UI Role.""" + # todo: 添加其他出图engine + for skill in skills: + if skill == "SD": + self.sd_engine = self.load_sd_engine() + logger.info(f"load skill engine {self.sd_engine}") + diff --git a/tests/metagpt/tools/test_sd_tool.py b/tests/metagpt/tools/test_sd_tool.py new file mode 100644 index 000000000..77e53c7dc --- /dev/null +++ b/tests/metagpt/tools/test_sd_tool.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/7/22 02:40 +# @Author : stellahong (stellahong@fuzhi.ai) +# +import os + +from metagpt.tools.sd_engine import SDEngine, WORKSPACE_ROOT + + +def test_sd_engine_init(): + sd_engine = SDEngine() + assert sd_engine.payload["seed"] == -1 + + +def test_sd_engine_generate_prompt(): + sd_engine = SDEngine() + sd_engine.construct_payload(prompt="test") + assert sd_engine.payload["prompt"] == "test" + + +async def test_sd_engine_run_t2i(): + sd_engine = SDEngine() + await sd_engine.run_t2i(prompts=["test"]) + img_path = WORKSPACE_ROOT / "resources" / "SD_Output" / "output_0.png" + assert os.path.exists(img_path) == True