automated get image

This commit is contained in:
黄伟韬 2024-09-11 15:56:16 +08:00
parent a94e282e7f
commit b262e5df7b
5 changed files with 112 additions and 2 deletions

View file

@ -76,6 +76,8 @@ Note:
19. When the requirement is simple, you don't need to create a plan, just do it right away.
20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
22. The default programming languages are HTML (.html), CSS (.css), and Pure JavaScript (.js).
23. When planning, consider whether images are needed. If you are developing a showcase website, start by using ImageGetter.get_image to obtain the necessary images.
"""
CURRENT_STATE = """
The current editor state is:

View file

@ -32,8 +32,10 @@ Note:
9. Do not use the 'end' command when the current task remains unfinished; instead, use the 'finish_current_task' command to indicate completion before switching to the next task.
10. Do not use escape characters in json data, particularly within file paths.
11. Analyze the capabilities of team members and assign tasks to them based on user Requirements. If the requirements ask to ignore certain tasks, follow the requirements.
12. Add default web technologies: HTML (*.html), CSS (*.css), and JavaScript (*.js) to your requirements.If no specific programming language is required, include these technologies in the project requirements. Using instruction to forward this information to your team members.
13. If the the user message is a question. use 'reply to human' to respond to the question, and then end.
12. If the the user message is a question. use 'reply to human' to respond to the question, and then end.
13. Instructions and reply must be in the same language.
14. Default technology stack is HTML (.html), CSS (.css), and Pure JavaScript (.js). Web app is the default option when developing software.
15. You are the only one who decides the programming language for the software, so the instruction must contain the programming language.
"""
TL_THOUGHT_GUIDANCE = (
THOUGHT_GUIDANCE

View file

@ -18,6 +18,7 @@ from metagpt.schema import UserMessage
from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
from metagpt.tools.libs.cr import CodeReview
from metagpt.tools.libs.git import git_create_pull
from metagpt.tools.libs.image_getter import ImageGetter
from metagpt.tools.libs.terminal import Terminal
from metagpt.tools.tool_registry import register_tool
from metagpt.utils.common import CodeParser, awrite
@ -42,6 +43,7 @@ class Engineer2(RoleZero):
"SearchEnhancedQA",
"Engineer2",
"CodeReview",
"ImageGetter",
]
# SWE Agent parameter
run_eval: bool = False
@ -84,10 +86,12 @@ class Engineer2(RoleZero):
)
else:
# Default tool map
image_getter = ImageGetter()
self.tool_execution_map.update(
{
"git_create_pull": git_create_pull,
"Engineer2.write_new_code": self.write_new_code,
"ImageGetter.get_image": image_getter.get_image,
"CodeReview.review": cr.review,
"CodeReview.fix": cr.fix,
"Terminal.run_command": self.terminal.run_command,

View file

@ -1026,6 +1026,21 @@ Thought: Now that the changes have been pushed to the remote repository, due to
}
]
```
## example 11
The requirements is a product website contain some goods including cap, dress and tshit.
I think the website should conatin the picture of the goods,but user did not provide, so i will get the image first.
```json
[
{
"command_name": "ImageGetter.get_image",
"args": {
"search_term": "cap",
"save_file_path": "/tmp/workspace/images/cap.png",
}
}
]
```
"""
WEB_SCRAPING_EXAMPLE = """

View file

@ -0,0 +1,87 @@
from __future__ import annotations
import base64
import os
import re
from pathlib import Path
from typing import Optional
from playwright.async_api import Browser as Browser_
from playwright.async_api import BrowserContext, Page, Playwright, async_playwright
from pydantic import BaseModel, ConfigDict, Field
from metagpt.tools.tool_registry import register_tool
from metagpt.utils.proxy_env import get_proxy_from_env
from metagpt.utils.report import BrowserReporter
@register_tool(include_functions=["get_image"])
class ImageGetter(BaseModel):
"""
A tool to get images.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
playwright: Optional[Playwright] = Field(default=None, exclude=True)
browser_instance: Optional[Browser_] = Field(default=None, exclude=True)
browser_ctx: Optional[BrowserContext] = Field(default=None, exclude=True)
page: Optional[Page] = Field(default=None, exclude=True)
headless: bool = Field(default=True)
proxy: Optional[dict] = Field(default_factory=get_proxy_from_env)
reporter: BrowserReporter = Field(default_factory=BrowserReporter)
async def start(self) -> None:
"""Starts Playwright and launches a browser"""
if self.playwright is None:
self.playwright = playwright = await async_playwright().start()
browser = self.browser_instance = await playwright.chromium.launch(headless=self.headless, proxy=self.proxy)
browser_ctx = self.browser_ctx = await browser.new_context()
self.page = await browser_ctx.new_page()
async def get_image(self, search_term, save_file_path):
"""
Get an image related to the search term.
Args:
search_term (str): The term to search for the image.
save_file_path (str): The file path where the image will
"""
# Seach image
url = f"https://unsplash.com/s/photos/{search_term}/"
if self.page is None:
await self.start()
await self.page.goto(url, wait_until="domcontentloaded")
# Wait for the element
try:
await self.page.wait_for_selector(".zNNw1 > div > img:nth-of-type(2)")
except TimeoutError:
return f"{search_term} not found. Please broaden the search term."
image_base64 = await self.page.evaluate(
"""async () => {
var img = document.querySelector('.zNNw1 > div > img:nth-of-type(2)');
if (img && img.src) {
const response = await fetch(img.src);
if (response.ok) {
const blob = await response.blob();
return await new Promise(resolve => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
}
}
return null;
}"""
)
if image_base64:
file_path = Path(save_file_path)
os.makedirs(file_path.parent, exist_ok=True)
with open(save_file_path, "wb") as f:
imgstr = re.sub("data:image/.*?;base64,", "", image_base64)
image_data = base64.b64decode(imgstr)
f.write(image_data)
return f"{search_term} found. The image is saved in {save_file_path}."
else:
return f"{search_term} not found. Please broaden the search term."