Merge branch 'dev' into code_intepreter

This commit is contained in:
yzlin 2024-02-02 20:45:45 +08:00
commit 891e35b92f
108 changed files with 5271 additions and 408 deletions

View file

@ -0,0 +1,77 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : use gpt4v to improve prompt and draw image with dall-e-3
"""set `model: "gpt-4-vision-preview"` in `config2.yaml` first"""
import asyncio
from PIL import Image
from metagpt.actions.action import Action
from metagpt.logs import logger
from metagpt.roles.role import Role
from metagpt.schema import Message
from metagpt.utils.common import encode_image
class GenAndImproveImageAction(Action):
save_image: bool = True
async def generate_image(self, prompt: str) -> Image:
imgs = await self.llm.gen_image(model="dall-e-3", prompt=prompt)
return imgs[0]
async def refine_prompt(self, old_prompt: str, image: Image) -> str:
msg = (
f"You are a creative painter, with the given generated image and old prompt: {old_prompt}, "
f"please refine the prompt and generate new one. Just output the new prompt."
)
b64_img = encode_image(image)
new_prompt = await self.llm.aask(msg=msg, images=[b64_img])
return new_prompt
async def evaluate_images(self, old_prompt: str, images: list[Image]) -> str:
msg = (
"With the prompt and two generated image, to judge if the second one is better than the first one. "
"If so, just output True else output False"
)
b64_imgs = [encode_image(img) for img in images]
res = await self.llm.aask(msg=msg, images=b64_imgs)
return res
async def run(self, messages: list[Message]) -> str:
prompt = messages[-1].content
old_img: Image = await self.generate_image(prompt)
new_prompt = await self.refine_prompt(old_prompt=prompt, image=old_img)
logger.info(f"original prompt: {prompt}")
logger.info(f"refined prompt: {new_prompt}")
new_img: Image = await self.generate_image(new_prompt)
if self.save_image:
old_img.save("./img_by-dall-e_old.png")
new_img.save("./img_by-dall-e_new.png")
res = await self.evaluate_images(old_prompt=prompt, images=[old_img, new_img])
opinion = f"The second generated image is better than the first one: {res}"
logger.info(f"evaluate opinion: {opinion}")
return opinion
class Painter(Role):
name: str = "MaLiang"
profile: str = "Painter"
goal: str = "to generate fine painting"
def __init__(self, **data):
super().__init__(**data)
self.set_actions([GenAndImproveImageAction])
async def main():
role = Painter()
await role.run(with_message="a girl with flowers")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -6,9 +6,11 @@
@File : llm_hello_world.py
"""
import asyncio
from pathlib import Path
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.utils.common import encode_image
async def main():
@ -27,6 +29,12 @@ async def main():
if hasattr(llm, "completion"):
logger.info(llm.completion(hello_msg))
# check if the configured llm supports llm-vision capacity. If not, it will throw a error
invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
img_base64 = encode_image(invoice_path)
res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
assert "true" in res.lower()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -5,17 +5,20 @@
import asyncio
from metagpt.roles import Searcher
from metagpt.tools import SearchEngineType
from metagpt.tools.search_engine import SearchEngine, SearchEngineType
async def main():
question = "What are the most interesting human facts?"
kwargs = {"api_key": "", "cse_id": "", "proxy": None}
# Serper API
# await Searcher(engine=SearchEngineType.SERPER_GOOGLE).run(question)
# await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPER_GOOGLE, **kwargs)).run(question)
# SerpAPI
await Searcher(engine=SearchEngineType.SERPAPI_GOOGLE).run(question)
# await Searcher(search_engine=SearchEngine(engine=SearchEngineType.SERPAPI_GOOGLE, **kwargs)).run(question)
# Google API
# await Searcher(engine=SearchEngineType.DIRECT_GOOGLE).run(question)
# await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DIRECT_GOOGLE, **kwargs)).run(question)
# DDG API
await Searcher(search_engine=SearchEngine(engine=SearchEngineType.DUCK_DUCK_GO, **kwargs)).run(question)
if __name__ == "__main__":