Merge pull request #63 from stellaHSR/main

add UI role
This commit is contained in:
geekan 2023-07-23 00:09:09 +08:00 committed by GitHub
commit cfd5749456
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 667 additions and 2 deletions

View file

@ -1,4 +1,3 @@
# DO NOT MODIFY THIS FILE, create a new key.yaml, define OPENAI_API_KEY.
# The configuration of key.yaml has a higher priority and will not enter git
@ -36,3 +35,8 @@ RPM: 10
#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
#AZURE_TTS_REGION: "eastus"
#### for Stable Diffusion
## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
SD_URL: "YOUR_SD_URL"
SD_T2I_API: "/sdapi/v1/txt2img"

View file

@ -13,7 +13,7 @@ from metagpt.actions.action_output import ActionOutput
from tenacity import retry, stop_after_attempt, wait_fixed
from pydantic import BaseModel
from metagpt.utils.common import OutputParser
from metagpt.logs import logger
class Action(ABC):
def __init__(self, name: str = '', context=None, llm: LLM = None):
@ -55,8 +55,10 @@ class Action(ABC):
system_msgs = []
system_msgs.append(self.prefix)
content = await self.llm.aask(prompt, system_msgs)
logger.debug(content)
output_class = ActionOutput.create_model_class(output_class_name, output_data_mapping)
parsed_data = OutputParser.parse_data_with_mapping(content, output_data_mapping)
logger.debug(parsed_data)
instruct_content = output_class(**parsed_data)
return ActionOutput(content, instruct_content)

View file

@ -59,6 +59,7 @@ ATTENTION: Use '##' to SPLIT SECTIONS, not '#'. AND '## <SECTION_NAME>' SHOULD W
## Requirement Pool: Provided as Python list[str, str], the parameters are requirement description, priority(P0/P1/P2), respectively, comply with PEP standards; no more than 5 requirements and consider to make its difficulty lower
## UI Design draft: Provide as Plain text. Be simple. Describe the elements and functions, also provide a simple style description and layout description.
## Anything UNCLEAR: Provide as Plain text. Make clear here.
"""
FORMAT_EXAMPLE = """
@ -105,6 +106,9 @@ The product should be a ...
]
```
## UI Design draft
Give a basic function description, and a draft
## Anything UNCLEAR
There are no unclear points.
---
@ -117,6 +121,7 @@ OUTPUT_MAPPING = {
"Competitive Quadrant Chart": (str, ...),
"Requirement Analysis": (str, ...),
"Requirement Pool": (List[Tuple[str, str]], ...),
"UI Design draft":(str, ...),
"Anything UNCLEAR": (str, ...),
}
@ -136,5 +141,6 @@ class WritePRD(Action):
prompt = PROMPT_TEMPLATE.format(requirements=requirements, search_information=info,
format_example=FORMAT_EXAMPLE)
logger.debug(prompt)
prd = await self._aask_v1(prompt, "prd", OUTPUT_MAPPING)
return prd

View file

@ -114,6 +114,7 @@ class Role:
def _set_state(self, state):
"""Update the current state."""
self._rc.state = state
logger.debug(self._actions)
self._rc.todo = self._actions[self._rc.state]
def set_env(self, env: 'Environment'):
@ -170,8 +171,11 @@ class Role:
if not self._rc.env:
return 0
env_msgs = self._rc.env.memory.get()
observed = self._rc.env.memory.get_by_actions(self._rc.watch)
already_observed = self._rc.memory.get()
news: list[Message] = []
for i in observed:
if i in already_observed:

135
metagpt/tools/sd_engine.py Normal file
View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
# @Date : 2023/7/19 16:28
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import os
import asyncio
from os.path import join
from typing import List
import json
import io
import base64
from aiohttp import ClientSession
from PIL import Image, PngImagePlugin
from metagpt.logs import logger
from metagpt.config import Config
from metagpt.const import WORKSPACE_ROOT
config = Config()
payload = {
"prompt": "",
"negative_prompt": "(easynegative:0.8),black, dark,Low resolution",
"override_settings": {
"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"
},
"seed": -1,
"batch_size": 1,
"n_iter": 1,
"steps": 20,
"cfg_scale": 7,
"width": 512,
"height": 768,
"restore_faces": False,
"tiling": False,
"do_not_save_samples": False,
"do_not_save_grid": False,
'enable_hr': False,
'hr_scale': 2,
'hr_upscaler': 'Latent',
'hr_second_pass_steps': 0,
'hr_resize_x': 0,
'hr_resize_y': 0,
'hr_upscale_to_x': 0,
'hr_upscale_to_y': 0,
'truncate_x': 0,
'truncate_y': 0,
'applied_old_hires_behavior_to': None,
"eta": None,
"sampler_index": "DPM++ SDE Karras",
"alwayson_scripts": {}
}
default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"
class SDEngine:
def __init__(self):
# Initialize the SDEngine with configuration
self.config = Config()
self.sd_url = self.config.get('SD_URL')
self.sd_t2i_url = f"{self.sd_url}{self.config.get('SD_T2I_API')}"
# Define default payload settings for SD API
self.payload = payload
logger.info(self.sd_t2i_url)
def construct_payload(self, prompt, negtive_prompt=default_negative_prompt, width=512, height=512,
sd_model="galaxytimemachinesGTM_photoV20"):
# Configure the payload with provided inputs
self.payload["prompt"] = prompt
self.payload["negtive_prompt"] = negtive_prompt
self.payload["width"] = width
self.payload["height"] = height
self.payload["override_settings"]["sd_model_checkpoint"] = sd_model
logger.info(f"call sd payload is {self.payload}")
return self.payload
def _save(self, imgs, save_name=""):
save_dir = WORKSPACE_ROOT / "resources"/"SD_Output"
if not os.path.exists(save_dir):
os.makedirs(save_dir, exist_ok=True)
batch_decode_base64_to_image(imgs, save_dir, save_name=save_name)
async def run_t2i(self, prompts: List):
# Asynchronously run the SD API for multiple prompts
session = ClientSession()
for payload_idx, payload in enumerate(prompts):
results = await self.run(url=self.sd_t2i_url, payload=payload, session=session)
self._save(results, save_name=f"output_{payload_idx}")
await session.close()
async def run(self, url, payload, session):
# Perform the HTTP POST request to the SD API
async with session.post(url, json=payload, timeout=600) as rsp:
data = await rsp.read()
rsp_json = json.loads(data)
imgs = rsp_json['images']
logger.info(f"callback rsp json is {rsp_json.keys()}")
return imgs
async def run_i2i(self):
# todo: 添加图生图接口调用
raise NotImplementedError
async def run_sam(self):
# todo添加SAM接口调用
raise NotImplementedError
def decode_base64_to_image(img, save_name):
image = Image.open(io.BytesIO(base64.b64decode(img.split(",", 1)[0])))
pnginfo = PngImagePlugin.PngInfo()
logger.info(save_name)
image.save(f"{save_name}.png", pnginfo=pnginfo)
return pnginfo, image
def batch_decode_base64_to_image(imgs, save_dir="", save_name=""):
for idx, _img in enumerate(imgs):
save_name = join(save_dir, save_name)
decode_base64_to_image(_img, save_name=save_name)
if __name__ == "__main__":
import asyncio
engine = SDEngine()
prompt = "pixel style, game design, a game interface should be minimalistic and intuitive with the score and high score displayed at the top. The snake and its food should be easily distinguishable. The game should have a simple color scheme, with a contrasting color for the snake and its food. Complete interface boundary"
engine.construct_payload(prompt)
event_loop = asyncio.get_event_loop()
event_loop.run_until_complete(engine.run_t2i(prompt))

View file

@ -0,0 +1,191 @@
# -*- coding: utf-8 -*-
# @Date : 2023/7/22 02:40
# @Author : stellahong (stellahong@fuzhi.ai)
#
from tests.metagpt.roles.ui_role import UIDesign
llm_resp= '''
# UI Design Description
```The user interface for the snake game will be designed in a way that is simple, clean, and intuitive. The main elements of the game such as the game grid, snake, food, score, and game over message will be clearly defined and easy to understand. The game grid will be centered on the screen with the score displayed at the top. The game controls will be intuitive and easy to use. The design will be modern and minimalist with a pleasing color scheme.```
## Selected Elements
Game Grid: The game grid will be a rectangular area in the center of the screen where the game will take place. It will be defined by a border and will have a darker background color.
Snake: The snake will be represented by a series of connected blocks that move across the grid. The color of the snake will be different from the background color to make it stand out.
Food: The food will be represented by small objects that are a different color from the snake and the background. The food will be randomly placed on the grid.
Score: The score will be displayed at the top of the screen. The score will increase each time the snake eats a piece of food.
Game Over: When the game is over, a message will be displayed in the center of the screen. The player will be given the option to restart the game.
## HTML Layout
```html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snake Game</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div class="score">Score: 0</div>
<div class="game-grid">
<!-- Snake and food will be dynamically generated here using JavaScript -->
</div>
<div class="game-over">Game Over</div>
</body>
</html>
```
## CSS Styles (styles.css)
```css
body {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f0f0;
}
.score {
font-size: 2em;
margin-bottom: 1em;
}
.game-grid {
width: 400px;
height: 400px;
display: grid;
grid-template-columns: repeat(20, 1fr);
grid-template-rows: repeat(20, 1fr);
gap: 1px;
background-color: #222;
border: 1px solid #555;
}
.snake-segment {
background-color: #00cc66;
}
.food {
background-color: #cc3300;
}
.control-panel {
display: flex;
justify-content: space-around;
width: 400px;
margin-top: 1em;
}
.control-button {
padding: 1em;
font-size: 1em;
border: none;
background-color: #555;
color: #fff;
cursor: pointer;
}
.game-over {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
font-size: 3em;
'''
def test_ui_design_parse_css():
ui_design_work = UIDesign(name="UI design action")
css = '''
body {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f0f0;
}
.score {
font-size: 2em;
margin-bottom: 1em;
}
.game-grid {
width: 400px;
height: 400px;
display: grid;
grid-template-columns: repeat(20, 1fr);
grid-template-rows: repeat(20, 1fr);
gap: 1px;
background-color: #222;
border: 1px solid #555;
}
.snake-segment {
background-color: #00cc66;
}
.food {
background-color: #cc3300;
}
.control-panel {
display: flex;
justify-content: space-around;
width: 400px;
margin-top: 1em;
}
.control-button {
padding: 1em;
font-size: 1em;
border: none;
background-color: #555;
color: #fff;
cursor: pointer;
}
.game-over {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
font-size: 3em;
'''
assert ui_design_work.parse_css_code(context=llm_resp)==css
def test_ui_design_parse_html():
ui_design_work = UIDesign(name="UI design action")
html = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snake Game</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div class="score">Score: 0</div>
<div class="game-grid">
<!-- Snake and food will be dynamically generated here using JavaScript -->
</div>
<div class="game-over">Game Over</div>
</body>
</html>
'''
assert ui_design_work.parse_css_code(context=llm_resp)==html

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# @Date : 2023/7/22 02:40
# @Author : stellahong (stellahong@fuzhi.ai)
#
from metagpt.software_company import SoftwareCompany
from metagpt.roles import ProductManager
from tests.metagpt.roles.ui_role import UI
def test_add_ui():
ui = UI()
assert ui.profile == "UI Design"
async def test_ui_role(idea: str, investment: float = 3.0, n_round: int = 5):
"""Run a startup. Be a boss."""
company = SoftwareCompany()
company.hire([ProductManager(), UI()])
company.invest(investment)
company.start_project(idea)
await company.run(n_round=n_round)

View file

@ -0,0 +1,276 @@
# -*- coding: utf-8 -*-
# @Date : 2023/7/15 16:40
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import re
import os
from importlib import import_module
from functools import wraps
from metagpt.logs import logger
from metagpt.actions import Action, ActionOutput
from metagpt.roles import ProductManager, Role
from metagpt.schema import Message
from metagpt.const import WORKSPACE_ROOT
from metagpt.actions import WritePRD
from metagpt.software_company import SoftwareCompany
from metagpt.tools.sd_engine import SDEngine
PROMPT_TEMPLATE = '''
# Context
{context}
## Format example
{format_example}
-----
Role: You are a UserInterface Designer; the goal is to finish a UI design according to PRD, give a design description, and select specified elements and UI style.
Requirements: Based on the context, fill in the following missing information, provide detailed HTML and CSS code
Attention: Use '##' to split sections, not '#', and '## <SECTION_NAME>' SHOULD WRITE BEFORE the code and triple quote.
## UI Design Description:Provide as Plain text, place the design objective here
## Selected Elements:Provide as Plain text, up to 5 specified elements, clear and simple
## HTML Layout:Provide as Plain text, use standard HTML code
## CSS Styles (styles.css):Provide as Plain text,use standard css code
## Anything UNCLEAR:Provide as Plain text. Make clear here.
'''
FORMAT_EXAMPLE = '''
## UI Design Description
```Snake games are classic and addictive games with simple yet engaging elements. Here are the main elements commonly found in snake games ```
## Selected Elements
Game Grid: The game grid is a rectangular...
Snake: The player controls a snake that moves across the grid...
Food: Food items (often represented as small objects or differently colored blocks)
Score: The player's score increases each time the snake eats a piece of food. The longer the snake becomes, the higher the score.
Game Over: The game ends when the snake collides with itself or an obstacle. At this point, the player's final score is displayed, and they are given the option to restart the game.
## HTML Layout
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Snake Game</title>
<link rel="stylesheet" href="styles.css">
</head>
<body>
<div class="game-grid">
<!-- Snake will be dynamically generated here using JavaScript -->
</div>
<div class="food">
<!-- Food will be dynamically generated here using JavaScript -->
</div>
</body>
</html>
## CSS Styles (styles.css)
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background-color: #f0f0f0;
}
.game-grid {
width: 400px;
height: 400px;
display: grid;
grid-template-columns: repeat(20, 1fr); /* Adjust to the desired grid size */
grid-template-rows: repeat(20, 1fr);
gap: 1px;
background-color: #222;
border: 1px solid #555;
}
.game-grid div {
width: 100%;
height: 100%;
background-color: #444;
}
.snake-segment {
background-color: #00cc66; /* Snake color */
}
.food {
width: 100%;
height: 100%;
background-color: #cc3300; /* Food color */
position: absolute;
}
/* Optional styles for a simple game over message */
.game-over {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
font-size: 24px;
font-weight: bold;
color: #ff0000;
display: none;
}
## Anything UNCLEAR
There are no unclear points.
'''
OUTPUT_MAPPING = {
"UI Design Description": (str, ...),
"Selected Elements": (str, ...),
"HTML Layout": (str, ...),
"CSS Styles (styles.css)": (str, ...),
"Anything UNCLEAR": (str, ...),
}
def load_engine(func):
"""Decorator to load an engine by file name and engine name."""
@wraps(func)
def wrapper(*args, **kwargs):
file_name, engine_name = func(*args, **kwargs)
engine_file = import_module(file_name, package='metagpt')
ip_module_cls = getattr(engine_file, engine_name)
try:
engine = ip_module_cls()
except:
engine = None
return engine
return wrapper
def parse(func):
"""Decorator to parse information using regex pattern."""
@wraps(func)
def wrapper(*args, **kwargs):
context, pattern = func(*args, **kwargs)
match = re.search(pattern, context, re.DOTALL)
if match:
text_info = match.group(1)
logger.info(text_info)
else:
text_info = context
logger.info("未找到匹配的内容")
return text_info
return wrapper
class UIDesign(Action):
"""Class representing the UI Design action."""
def __init__(self, name, context=None, llm=None):
super().__init__(name, context, llm) # 需要调用LLM进一步丰富UI设计的prompt
@parse
def parse_requirement(self, context: str):
"""Parse UI Design draft from the context using regex."""
pattern = r"## UI Design draft.*?\n(.*?)## Anything UNCLEAR"
return context, pattern
@parse
def parse_ui_elements(self, context: str):
"""Parse Selected Elements from the context using regex."""
pattern = r"## Selected Elements.*?\n(.*?)## HTML Layout"
return context, pattern
@parse
def parse_css_code(self, context: str):
pattern = r"```css.*?\n(.*?)## Anything UNCLEAR"
return context, pattern
@parse
def parse_html_code(self, context: str):
pattern = r"```html.*?\n(.*?)```"
return context, pattern
async def draw_icons(self, context, *args, **kwargs):
"""Draw icons using SDEngine."""
engine = SDEngine()
icon_prompts = self.parse_ui_elements(context)
icons = icon_prompts.split("\n")
icons = [s for s in icons if len(s.strip()) > 0]
prompts_batch = []
for icon_prompt in icons:
# fixme: 添加icon lora
prompt = engine.construct_payload(icon_prompt + ".<lora:WZ0710_AW81e-3_30e3b128d64T32_goon0.5>")
prompts_batch.append(prompt)
await engine.run_t2i(prompts_batch)
logger.info("Finish icon design using StableDiffusion API")
async def _save(self, css_content, html_content):
save_dir = WORKSPACE_ROOT / "resources" / 'codes'
if not os.path.exists(save_dir):
os.makedirs(save_dir, exist_ok=True)
# Save CSS and HTML content to files
css_file_path = save_dir / f"ui_design.css"
html_file_path = save_dir / f"ui_design.html"
with open(css_file_path, 'w') as css_file:
css_file.write(css_content)
with open(html_file_path, 'w') as html_file:
html_file.write(html_content)
async def run(self, requirements: list[Message], *args, **kwargs) -> ActionOutput:
"""Run the UI Design action."""
# fixme: update prompt (根据需求细化prompt
context = requirements[-1].content
ui_design_draft = self.parse_requirement(context=context)
# todo: parse requirements str
prompt = PROMPT_TEMPLATE.format(context=ui_design_draft, format_example=FORMAT_EXAMPLE)
logger.info(prompt)
ui_describe = await self._aask_v1(prompt, "ui_design", OUTPUT_MAPPING)
logger.info(ui_describe.content)
logger.info(ui_describe.instruct_content)
css = self.parse_css_code(context=ui_describe.content)
html = self.parse_html_code(context=ui_describe.content)
await self._save(css_content=css, html_content=html)
await self.draw_icons(ui_describe.content)
return ui_describe
class UI(Role):
"""Class representing the UI Role."""
def __init__(self, name="Catherine", profile="UI Design",
goal="Finish a workable and good User Interface design based on a product design",
constraints="Give clear layout description and use standard icons to finish the design",
skills=["SD"]):
super().__init__(name, profile, goal, constraints)
self.load_skills(skills)
self._init_actions([UIDesign])
self._watch([WritePRD])
@load_engine
def load_sd_engine(self):
"""Load the SDEngine."""
file_name = ".tools.sd_engine"
engine_name = "SDEngine"
return file_name, engine_name
def load_skills(self, skills):
"""Load skills for the UI Role."""
# todo: 添加其他出图engine
for skill in skills:
if skill == "SD":
self.sd_engine = self.load_sd_engine()
logger.info(f"load skill engine {self.sd_engine}")

View file

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# @Date : 2023/7/22 02:40
# @Author : stellahong (stellahong@fuzhi.ai)
#
import os
from metagpt.tools.sd_engine import SDEngine, WORKSPACE_ROOT
def test_sd_engine_init():
sd_engine = SDEngine()
assert sd_engine.payload["seed"] == -1
def test_sd_engine_generate_prompt():
sd_engine = SDEngine()
sd_engine.construct_payload(prompt="test")
assert sd_engine.payload["prompt"] == "test"
async def test_sd_engine_run_t2i():
sd_engine = SDEngine()
await sd_engine.run_t2i(prompts=["test"])
img_path = WORKSPACE_ROOT / "resources" / "SD_Output" / "output_0.png"
assert os.path.exists(img_path) == True