mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-15 11:02:36 +02:00
feat: merge feature/oas3_skills
This commit is contained in:
commit
6e34656004
14 changed files with 381 additions and 9 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -167,3 +167,5 @@ output.wav
|
|||
|
||||
# output folder
|
||||
output
|
||||
tmp.png
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ paths:
|
|||
/txt2img/openai:
|
||||
post:
|
||||
summary: "Convert Text to Base64-encoded Image Data Stream"
|
||||
operationId: openai_text_2_image.oas3_openai_text_2_image
|
||||
operationId: openai_text_to_image.oas3_openai_text_to_image
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
|
|
@ -109,7 +109,7 @@ paths:
|
|||
/txt2embedding/openai:
|
||||
post:
|
||||
summary: Text to embedding
|
||||
operationId: openai_text_2_embedding.oas3_openai_text_2_embedding
|
||||
operationId: openai_text_to_embedding.oas3_openai_text_to_embedding
|
||||
description: Retrieve an embedding for the provided text using the OpenAI API.
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -144,6 +144,49 @@ paths:
|
|||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/txt2image/metagpt:
|
||||
post:
|
||||
summary: "Text to Image"
|
||||
description: "Generate an image from the provided text using the MetaGPT Text-to-Image API."
|
||||
operationId: metagpt_text_to_image.oas3_metagpt_text_to_image
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- text
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
description: "The text used for image conversion."
|
||||
size_type:
|
||||
type: string
|
||||
enum: ["512x512", "512x768"]
|
||||
default: "512x512"
|
||||
description: "Size of the generated image."
|
||||
model_url:
|
||||
type: string
|
||||
description: "Model reset API URL for text-to-image."
|
||||
default: ""
|
||||
responses:
|
||||
'200':
|
||||
description: "Base64-encoded image data."
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
image_data:
|
||||
type: string
|
||||
format: base64
|
||||
'400':
|
||||
description: "Bad Request"
|
||||
'500':
|
||||
description: "Internal Server Error"
|
||||
|
||||
components:
|
||||
schemas:
|
||||
Embedding:
|
||||
|
|
|
|||
|
|
@ -70,3 +70,6 @@ SD_T2I_API: "/sdapi/v1/txt2img"
|
|||
### for Research
|
||||
MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
|
||||
MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
|
||||
|
||||
### Meta Models
|
||||
#METAGPT_TEXT_TO_IMAGE_MODEL: MODEL_URL
|
||||
26
metagpt/learn/text_to_embedding.py
Normal file
26
metagpt/learn/text_to_embedding.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : text_to_embedding.py
|
||||
@Desc : Text-to-Embedding skill, which provides text-to-embedding functionality.
|
||||
"""
|
||||
import os
|
||||
|
||||
from metagpt.tools.openai_text_to_embedding import oas3_openai_text_to_embedding
|
||||
from metagpt.utils.common import initialize_environment
|
||||
|
||||
|
||||
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
|
||||
"""Text to embedding
|
||||
|
||||
:param text: The text used for embedding.
|
||||
:param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
|
||||
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
|
||||
:return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
|
||||
"""
|
||||
initialize_environment()
|
||||
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
|
||||
return oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key)
|
||||
raise EnvironmentError
|
||||
30
metagpt/learn/text_to_image.py
Normal file
30
metagpt/learn/text_to_image.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : text_to_image.py
|
||||
@Desc : Text-to-Image skill, which provides text-to-image functionality.
|
||||
"""
|
||||
import os
|
||||
|
||||
from metagpt.tools.metagpt_text_to_image import oas3_metagpt_text_to_image
|
||||
from metagpt.tools.openai_text_to_image import oas3_openai_text_to_image
|
||||
from metagpt.utils.common import initialize_environment
|
||||
|
||||
|
||||
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""):
|
||||
"""Text to image
|
||||
|
||||
:param text: The text used for image conversion.
|
||||
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
|
||||
:param size_type: If using OPENAI, the available size options are ['256x256', '512x512', '1024x1024'], while for MetaGPT, the options are ['512x512', '512x768'].
|
||||
:param model_url: MetaGPT model url
|
||||
:return: The image data is returned in Base64 encoding.
|
||||
"""
|
||||
initialize_environment()
|
||||
if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url:
|
||||
return oas3_metagpt_text_to_image(text, size_type, model_url)
|
||||
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
|
||||
return oas3_openai_text_to_image(text, size_type, openai_api_key)
|
||||
raise EnvironmentError
|
||||
35
metagpt/learn/text_to_speech.py
Normal file
35
metagpt/learn/text_to_speech.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/17
|
||||
@Author : mashenquan
|
||||
@File : text_to_speech.py
|
||||
@Desc : Text-to-Speech skill, which provides text-to-speech functionality
|
||||
"""
|
||||
import os
|
||||
|
||||
from metagpt.tools.azure_tts import oas3_azsure_tts
|
||||
from metagpt.utils.common import initialize_environment
|
||||
|
||||
|
||||
def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl",
|
||||
subscription_key="", region=""):
|
||||
"""Text to speech
|
||||
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
|
||||
:param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
:param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
|
||||
:param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
:param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
:param text: The text used for voice conversion.
|
||||
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
|
||||
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
|
||||
:return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
|
||||
|
||||
"""
|
||||
initialize_environment()
|
||||
if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \
|
||||
(subscription_key and region):
|
||||
return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
|
||||
|
||||
raise EnvironmentError
|
||||
|
|
@ -62,7 +62,7 @@ class AzureTTS:
|
|||
|
||||
# Export
|
||||
def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""):
|
||||
"""oas3/tts/azsure
|
||||
"""Text to speech
|
||||
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
|
||||
:param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
|
|
|
|||
112
metagpt/tools/metagpt_text_to_image.py
Normal file
112
metagpt/tools/metagpt_text_to_image.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : metagpt_text_to_image.py
|
||||
@Desc : MetaGPT Text-to-Image OAS3 api, which provides text-to-image functionality.
|
||||
"""
|
||||
import base64
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
|
||||
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
|
||||
from metagpt.utils.common import initialize_environment
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class MetaGPTText2Image:
|
||||
def __init__(self, model_url):
|
||||
"""
|
||||
:param model_url: Model reset api url
|
||||
"""
|
||||
self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
|
||||
|
||||
def text_2_image(self, text, size_type="512x512"):
|
||||
"""Text to image
|
||||
|
||||
:param text: The text used for image conversion.
|
||||
:param size_type: One of ['512x512', '512x768']
|
||||
:return: The image data is returned in Base64 encoding.
|
||||
"""
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
dims = size_type.split("x")
|
||||
data = {
|
||||
"prompt": text,
|
||||
"negative_prompt": "(easynegative:0.8),black, dark,Low resolution",
|
||||
"override_settings": {"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"},
|
||||
"seed": -1,
|
||||
"batch_size": 1,
|
||||
"n_iter": 1,
|
||||
"steps": 20,
|
||||
"cfg_scale": 11,
|
||||
"width": int(dims[0]),
|
||||
"height": int(dims[1]), # 768,
|
||||
"restore_faces": False,
|
||||
"tiling": False,
|
||||
"do_not_save_samples": False,
|
||||
"do_not_save_grid": False,
|
||||
"enable_hr": False,
|
||||
"hr_scale": 2,
|
||||
"hr_upscaler": "Latent",
|
||||
"hr_second_pass_steps": 0,
|
||||
"hr_resize_x": 0,
|
||||
"hr_resize_y": 0,
|
||||
"hr_upscale_to_x": 0,
|
||||
"hr_upscale_to_y": 0,
|
||||
"truncate_x": 0,
|
||||
"truncate_y": 0,
|
||||
"applied_old_hires_behavior_to": None,
|
||||
"eta": None,
|
||||
"sampler_index": "DPM++ SDE Karras",
|
||||
"alwayson_scripts": {},
|
||||
}
|
||||
|
||||
class ImageResult(BaseModel):
|
||||
images: List
|
||||
parameters: Dict
|
||||
|
||||
try:
|
||||
response = requests.post(self.model_url, headers=headers, json=data)
|
||||
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
|
||||
result = ImageResult(**response.json())
|
||||
if len(result.images) == 0:
|
||||
return ""
|
||||
return result.images[0]
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"An error occurred:{e}")
|
||||
return ""
|
||||
|
||||
|
||||
# Export
|
||||
def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""):
|
||||
"""Text to image
|
||||
|
||||
:param text: The text used for image conversion.
|
||||
:param model_url: Model reset api
|
||||
:param size_type: One of ['512x512', '512x768']
|
||||
:return: The image data is returned in Base64 encoding.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
if not model_url:
|
||||
model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
|
||||
return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
initialize_environment()
|
||||
|
||||
v = oas3_metagpt_text_2_image("Panda emoji")
|
||||
data = base64.b64decode(v)
|
||||
with open("tmp.png", mode="wb") as writer:
|
||||
writer.write(data)
|
||||
print(v)
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : openai_text_2_embedding.py
|
||||
@File : openai_text_to_embedding.py
|
||||
@Desc : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality.
|
||||
For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object`
|
||||
"""
|
||||
|
|
@ -70,7 +70,7 @@ class OpenAIText2Embedding:
|
|||
|
||||
|
||||
# Export
|
||||
def oas3_openai_text_2_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
|
||||
def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
|
||||
"""Text to embedding
|
||||
|
||||
:param text: The text used for embedding.
|
||||
|
|
@ -88,5 +88,5 @@ def oas3_openai_text_2_embedding(text, model="text-embedding-ada-002", openai_ap
|
|||
if __name__ == "__main__":
|
||||
initialize_environment()
|
||||
|
||||
v = oas3_openai_text_2_embedding("Panda emoji")
|
||||
v = oas3_openai_text_to_embedding("Panda emoji")
|
||||
print(v)
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
@Time : 2023/8/17
|
||||
@Author : mashenquan
|
||||
@File : openai_text_2_image.py
|
||||
@File : openai_text_to_image.py
|
||||
@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
|
||||
"""
|
||||
import base64
|
||||
|
|
@ -78,7 +78,7 @@ class OpenAIText2Image:
|
|||
|
||||
|
||||
# Export
|
||||
def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=""):
|
||||
def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""):
|
||||
"""Text to image
|
||||
|
||||
:param text: The text used for image conversion.
|
||||
|
|
@ -96,5 +96,5 @@ def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=
|
|||
if __name__ == "__main__":
|
||||
initialize_environment()
|
||||
|
||||
v = oas3_openai_text_2_image("Panda emoji")
|
||||
v = oas3_openai_text_to_image("Panda emoji")
|
||||
print(v)
|
||||
0
tests/metagpt/learn/__init__.py
Normal file
0
tests/metagpt/learn/__init__.py
Normal file
40
tests/metagpt/learn/test_text_to_embedding.py
Normal file
40
tests/metagpt/learn/test_text_to_embedding.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : test_text_to_embedding.py
|
||||
@Desc : Unit tests.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.learn.text_to_embedding import text_to_embedding
|
||||
|
||||
|
||||
async def mock_text_to_embedding():
|
||||
class Input(BaseModel):
|
||||
input: str
|
||||
|
||||
inputs = [
|
||||
{"input": "Panda emoji"}
|
||||
]
|
||||
|
||||
for i in inputs:
|
||||
seed = Input(**i)
|
||||
data = text_to_embedding(seed.input)
|
||||
v = ResultEmbedding(**data)
|
||||
assert len(v.data) > 0
|
||||
|
||||
|
||||
def test_suite():
|
||||
loop = asyncio.get_event_loop()
|
||||
task = loop.create_task(mock_text_to_embedding())
|
||||
loop.run_until_complete(task)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_suite()
|
||||
41
tests/metagpt/learn/test_text_to_image.py
Normal file
41
tests/metagpt/learn/test_text_to_image.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : test_text_to_image.py
|
||||
@Desc : Unit tests.
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.learn.text_to_image import text_to_image
|
||||
|
||||
|
||||
async def mock_text_to_image():
|
||||
class Input(BaseModel):
|
||||
input: str
|
||||
size_type: str
|
||||
|
||||
inputs = [
|
||||
{"input": "Panda emoji", "size_type": "512x512"}
|
||||
]
|
||||
|
||||
for i in inputs:
|
||||
seed = Input(**i)
|
||||
base64_data = text_to_image(seed.input)
|
||||
assert base64_data != ""
|
||||
print(f"{seed.input} -> {base64_data}")
|
||||
assert base64.b64decode(base64_data, validate=True)
|
||||
|
||||
|
||||
def test_suite():
|
||||
loop = asyncio.get_event_loop()
|
||||
task = loop.create_task(mock_text_to_image())
|
||||
loop.run_until_complete(task)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_suite()
|
||||
40
tests/metagpt/learn/test_text_to_speech.py
Normal file
40
tests/metagpt/learn/test_text_to_speech.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/18
|
||||
@Author : mashenquan
|
||||
@File : test_text_to_speech.py
|
||||
@Desc : Unit tests.
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.learn.text_to_speech import text_to_speech
|
||||
|
||||
|
||||
async def mock_text_to_speech():
|
||||
class Input(BaseModel):
|
||||
input: str
|
||||
|
||||
inputs = [
|
||||
{"input": "Panda emoji"}
|
||||
]
|
||||
|
||||
for i in inputs:
|
||||
seed = Input(**i)
|
||||
base64_data = text_to_speech(seed.input)
|
||||
assert base64_data != ""
|
||||
print(f"{seed.input} -> {base64_data}")
|
||||
assert base64.b64decode(base64_data, validate=True)
|
||||
|
||||
|
||||
def test_suite():
|
||||
loop = asyncio.get_event_loop()
|
||||
task = loop.create_task(mock_text_to_speech())
|
||||
loop.run_until_complete(task)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_suite()
|
||||
Loading…
Add table
Add a link
Reference in a new issue