mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
automated get image
This commit is contained in:
parent
a94e282e7f
commit
b262e5df7b
5 changed files with 112 additions and 2 deletions
|
|
@ -76,6 +76,8 @@ Note:
|
|||
19. When the requirement is simple, you don't need to create a plan, just do it right away.
|
||||
20. If the code exists, use the Editor tool's open and edit commands to modify it. Since it is not a new code, do not use write_new_code.
|
||||
21. When using the editor, pay attention to the editor's current directory. When you use editor tools, the paths must be either absolute or relative to the editor's current directory.
|
||||
22. The default programming languages are HTML (.html), CSS (.css), and Pure JavaScript (.js).
|
||||
23. When planning, consider whether images are needed. If you are developing a showcase website, start by using ImageGetter.get_image to obtain the necessary images.
|
||||
"""
|
||||
CURRENT_STATE = """
|
||||
The current editor state is:
|
||||
|
|
|
|||
|
|
@ -32,8 +32,10 @@ Note:
|
|||
9. Do not use the 'end' command when the current task remains unfinished; instead, use the 'finish_current_task' command to indicate completion before switching to the next task.
|
||||
10. Do not use escape characters in json data, particularly within file paths.
|
||||
11. Analyze the capabilities of team members and assign tasks to them based on user Requirements. If the requirements ask to ignore certain tasks, follow the requirements.
|
||||
12. Add default web technologies: HTML (*.html), CSS (*.css), and JavaScript (*.js) to your requirements.If no specific programming language is required, include these technologies in the project requirements. Using instruction to forward this information to your team members.
|
||||
13. If the the user message is a question. use 'reply to human' to respond to the question, and then end.
|
||||
12. If the the user message is a question. use 'reply to human' to respond to the question, and then end.
|
||||
13. Instructions and reply must be in the same language.
|
||||
14. Default technology stack is HTML (.html), CSS (.css), and Pure JavaScript (.js). Web app is the default option when developing software.
|
||||
15. You are the only one who decides the programming language for the software, so the instruction must contain the programming language.
|
||||
"""
|
||||
TL_THOUGHT_GUIDANCE = (
|
||||
THOUGHT_GUIDANCE
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from metagpt.schema import UserMessage
|
|||
from metagpt.strategy.experience_retriever import ENGINEER_EXAMPLE
|
||||
from metagpt.tools.libs.cr import CodeReview
|
||||
from metagpt.tools.libs.git import git_create_pull
|
||||
from metagpt.tools.libs.image_getter import ImageGetter
|
||||
from metagpt.tools.libs.terminal import Terminal
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import CodeParser, awrite
|
||||
|
|
@ -42,6 +43,7 @@ class Engineer2(RoleZero):
|
|||
"SearchEnhancedQA",
|
||||
"Engineer2",
|
||||
"CodeReview",
|
||||
"ImageGetter",
|
||||
]
|
||||
# SWE Agent parameter
|
||||
run_eval: bool = False
|
||||
|
|
@ -84,10 +86,12 @@ class Engineer2(RoleZero):
|
|||
)
|
||||
else:
|
||||
# Default tool map
|
||||
image_getter = ImageGetter()
|
||||
self.tool_execution_map.update(
|
||||
{
|
||||
"git_create_pull": git_create_pull,
|
||||
"Engineer2.write_new_code": self.write_new_code,
|
||||
"ImageGetter.get_image": image_getter.get_image,
|
||||
"CodeReview.review": cr.review,
|
||||
"CodeReview.fix": cr.fix,
|
||||
"Terminal.run_command": self.terminal.run_command,
|
||||
|
|
|
|||
|
|
@ -1026,6 +1026,21 @@ Thought: Now that the changes have been pushed to the remote repository, due to
|
|||
}
|
||||
]
|
||||
```
|
||||
|
||||
## example 11
|
||||
The requirements is a product website contain some goods including cap, dress and tshit.
|
||||
I think the website should conatin the picture of the goods,but user did not provide, so i will get the image first.
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "ImageGetter.get_image",
|
||||
"args": {
|
||||
"search_term": "cap",
|
||||
"save_file_path": "/tmp/workspace/images/cap.png",
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
WEB_SCRAPING_EXAMPLE = """
|
||||
|
|
|
|||
87
metagpt/tools/libs/image_getter.py
Normal file
87
metagpt/tools/libs/image_getter.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from playwright.async_api import Browser as Browser_
|
||||
from playwright.async_api import BrowserContext, Page, Playwright, async_playwright
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.proxy_env import get_proxy_from_env
|
||||
from metagpt.utils.report import BrowserReporter
|
||||
|
||||
|
||||
@register_tool(include_functions=["get_image"])
|
||||
class ImageGetter(BaseModel):
|
||||
"""
|
||||
A tool to get images.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
playwright: Optional[Playwright] = Field(default=None, exclude=True)
|
||||
browser_instance: Optional[Browser_] = Field(default=None, exclude=True)
|
||||
browser_ctx: Optional[BrowserContext] = Field(default=None, exclude=True)
|
||||
page: Optional[Page] = Field(default=None, exclude=True)
|
||||
headless: bool = Field(default=True)
|
||||
proxy: Optional[dict] = Field(default_factory=get_proxy_from_env)
|
||||
reporter: BrowserReporter = Field(default_factory=BrowserReporter)
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Starts Playwright and launches a browser"""
|
||||
if self.playwright is None:
|
||||
self.playwright = playwright = await async_playwright().start()
|
||||
browser = self.browser_instance = await playwright.chromium.launch(headless=self.headless, proxy=self.proxy)
|
||||
browser_ctx = self.browser_ctx = await browser.new_context()
|
||||
self.page = await browser_ctx.new_page()
|
||||
|
||||
async def get_image(self, search_term, save_file_path):
|
||||
"""
|
||||
Get an image related to the search term.
|
||||
|
||||
Args:
|
||||
search_term (str): The term to search for the image.
|
||||
save_file_path (str): The file path where the image will
|
||||
"""
|
||||
# Seach image
|
||||
url = f"https://unsplash.com/s/photos/{search_term}/"
|
||||
if self.page is None:
|
||||
await self.start()
|
||||
await self.page.goto(url, wait_until="domcontentloaded")
|
||||
# Wait for the element
|
||||
try:
|
||||
await self.page.wait_for_selector(".zNNw1 > div > img:nth-of-type(2)")
|
||||
except TimeoutError:
|
||||
return f"{search_term} not found. Please broaden the search term."
|
||||
|
||||
image_base64 = await self.page.evaluate(
|
||||
"""async () => {
|
||||
var img = document.querySelector('.zNNw1 > div > img:nth-of-type(2)');
|
||||
if (img && img.src) {
|
||||
const response = await fetch(img.src);
|
||||
if (response.ok) {
|
||||
const blob = await response.blob();
|
||||
return await new Promise(resolve => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result);
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}"""
|
||||
)
|
||||
if image_base64:
|
||||
file_path = Path(save_file_path)
|
||||
os.makedirs(file_path.parent, exist_ok=True)
|
||||
with open(save_file_path, "wb") as f:
|
||||
imgstr = re.sub("data:image/.*?;base64,", "", image_base64)
|
||||
image_data = base64.b64decode(imgstr)
|
||||
f.write(image_data)
|
||||
return f"{search_term} found. The image is saved in {save_file_path}."
|
||||
else:
|
||||
return f"{search_term} not found. Please broaden the search term."
|
||||
Loading…
Add table
Add a link
Reference in a new issue