feat: merge feature/skills

This commit is contained in:
莘权 马 2023-08-18 20:31:16 +08:00
commit fb137b018c
22 changed files with 1018 additions and 54 deletions

2
.gitignore vendored
View file

@ -167,3 +167,5 @@ output.wav
# output folder
output
tmp.png

View file

@ -0,0 +1,18 @@
{
"schema_version": "v1",
"name_for_model": "text processing tools",
"name_for_human": "MetaGPT Text Plugin",
"description_for_model": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.",
"description_for_human": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.",
"auth": {
"type": "none"
},
"api": {
"type": "openapi",
"url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/.well-known/metagpt_oas3_api.yaml",
"has_user_authentication": false
},
"logo_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/resources/MetaGPT-logo.png",
"contact_email": "mashenquan@fuzhi.cn",
"legal_info_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/README_CN.md"
}

View file

@ -0,0 +1,236 @@
openapi: "3.0.0"
info:
title: "MetaGPT Export OpenAPIs"
version: "1.0"
servers:
- url: "/oas3"
variables:
port:
default: '8080'
description: HTTP service port
paths:
/tts/azsure:
post:
summary: "Convert Text to Base64-encoded .wav File Stream"
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
operationId: azure_tts.oas3_azsure_tts
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- text
properties:
text:
type: string
description: Text to convert
lang:
type: string
description: The language code or locale, e.g., en-US (English - United States)
default: "zh-CN"
voice:
type: string
description: "Voice style, see: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts), [Voice Gallery](https://speech.microsoft.com/portal/voicegallery)"
default: "zh-CN-XiaomoNeural"
style:
type: string
description: "Speaking style to express different emotions. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
default: "affectionate"
role:
type: string
description: "Role to specify age and gender. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
default: "Girl"
subscription_key:
type: string
description: "Key used to access Azure AI service API, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`"
default: ""
region:
type: string
description: "Location (or region) of your resource, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`"
default: ""
responses:
'200':
description: "Base64-encoded .wav file data if successful, otherwise an empty string."
content:
application/json:
schema:
type: object
properties:
wav_data:
type: string
format: base64
'400':
description: "Bad Request"
'500':
description: "Internal Server Error"
/txt2img/openai:
post:
summary: "Convert Text to Base64-encoded Image Data Stream"
operationId: openai_text_to_image.oas3_openai_text_to_image
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
text:
type: string
description: "The text used for image conversion."
size_type:
type: string
enum: ["256x256", "512x512", "1024x1024"]
default: "1024x1024"
description: "Size of the generated image."
openai_api_key:
type: string
default: ""
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
responses:
'200':
description: "Base64-encoded image data."
content:
application/json:
schema:
type: object
properties:
image_data:
type: string
format: base64
'400':
description: "Bad Request"
'500':
description: "Internal Server Error"
/txt2embedding/openai:
post:
summary: Text to embedding
operationId: openai_text_to_embedding.oas3_openai_text_to_embedding
description: Retrieve an embedding for the provided text using the OpenAI API.
requestBody:
content:
application/json:
schema:
type: object
properties:
input:
type: string
description: The text used for embedding.
model:
type: string
description: "ID of the model to use. For more details, checkout: [models](https://api.openai.com/v1/models)"
enum:
- text-embedding-ada-002
responses:
"200":
description: Successful response
content:
application/json:
schema:
$ref: "#/components/schemas/ResultEmbedding"
"4XX":
description: Client error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
"5XX":
description: Server error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
/txt2image/metagpt:
post:
summary: "Text to Image"
description: "Generate an image from the provided text using the MetaGPT Text-to-Image API."
operationId: metagpt_text_to_image.oas3_metagpt_text_to_image
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- text
properties:
text:
type: string
description: "The text used for image conversion."
size_type:
type: string
enum: ["512x512", "512x768"]
default: "512x512"
description: "Size of the generated image."
model_url:
type: string
description: "Model reset API URL for text-to-image."
default: ""
responses:
'200':
description: "Base64-encoded image data."
content:
application/json:
schema:
type: object
properties:
image_data:
type: string
format: base64
'400':
description: "Bad Request"
'500':
description: "Internal Server Error"
components:
schemas:
Embedding:
type: object
description: Represents an embedding vector returned by the embedding endpoint.
properties:
object:
type: string
example: embedding
embedding:
type: array
items:
type: number
example: [0.0023064255, -0.009327292, ...]
index:
type: integer
example: 0
Usage:
type: object
properties:
prompt_tokens:
type: integer
example: 8
total_tokens:
type: integer
example: 8
ResultEmbedding:
type: object
properties:
object:
type: string
example: result_embedding
data:
type: array
items:
$ref: "#/components/schemas/Embedding"
model:
type: string
example: text-embedding-ada-002
usage:
$ref: "#/components/schemas/Usage"
Error:
type: object
properties:
error:
type: string
example: An error occurred

35
.well-known/openapi.yaml Normal file
View file

@ -0,0 +1,35 @@
openapi: "3.0.0"
info:
title: Hello World
version: "1.0"
servers:
- url: /openapi
paths:
/greeting/{name}:
post:
summary: Generate greeting
description: Generates a greeting message.
operationId: hello.post_greeting
responses:
200:
description: greeting response
content:
text/plain:
schema:
type: string
example: "hello dave!"
parameters:
- name: name
in: path
description: Name of the person to greet.
required: true
schema:
type: string
example: "dave"
requestBody:
content:
application/json:
schema:
type: object

View file

@ -70,3 +70,6 @@ SD_T2I_API: "/sdapi/v1/txt2img"
### for Research
MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
### Meta Models
#METAGPT_TEXT_TO_IMAGE_MODEL: MODEL_URL

View file

@ -1,53 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/6/9 22:22
@Author : Leo Xiao
@File : azure_tts.py
"""
from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer
from metagpt.actions.action import Action
from metagpt.config import Config
class AzureTTS(Action):
def __init__(self, name, context=None, llm=None):
super().__init__(name, context, llm)
self.config = Config()
# 参数参考https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles
def synthesize_speech(self, lang, voice, role, text, output_file):
subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY')
region = self.config.get('AZURE_TTS_REGION')
speech_config = SpeechConfig(
subscription=subscription_key, region=region)
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioConfig(filename=output_file)
synthesizer = SpeechSynthesizer(
speech_config=speech_config,
audio_config=audio_config)
# if voice=="zh-CN-YunxiNeural":
ssml_string = f"""
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='{lang}' xmlns:mstts='http://www.w3.org/2001/mstts'>
<voice name='{voice}'>
<mstts:express-as style='affectionate' role='{role}'>
{text}
</mstts:express-as>
</voice>
</speak>
"""
synthesizer.speak_ssml_async(ssml_string).get()
if __name__ == "__main__":
azure_tts = AzureTTS("azure_tts")
azure_tts.synthesize_speech(
"zh-CN",
"zh-CN-YunxiNeural",
"Boy",
"你好,我是卡卡",
"output.wav")

View file

@ -0,0 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : text_to_embedding.py
@Desc : Text-to-Embedding skill, which provides text-to-embedding functionality.
"""
import os
from metagpt.tools.openai_text_to_embedding import oas3_openai_text_to_embedding
from metagpt.utils.common import initialize_environment
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
"""Text to embedding
:param text: The text used for embedding.
:param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
:return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
"""
initialize_environment()
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
return oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key)
raise EnvironmentError

View file

@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : text_to_image.py
@Desc : Text-to-Image skill, which provides text-to-image functionality.
"""
import os
from metagpt.tools.metagpt_text_to_image import oas3_metagpt_text_to_image
from metagpt.tools.openai_text_to_image import oas3_openai_text_to_image
from metagpt.utils.common import initialize_environment
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""):
"""Text to image
:param text: The text used for image conversion.
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
:param size_type: If using OPENAI, the available size options are ['256x256', '512x512', '1024x1024'], while for MetaGPT, the options are ['512x512', '512x768'].
:param model_url: MetaGPT model url
:return: The image data is returned in Base64 encoding.
"""
initialize_environment()
if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url:
return oas3_metagpt_text_to_image(text, size_type, model_url)
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
return oas3_openai_text_to_image(text, size_type, openai_api_key)
raise EnvironmentError

View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17
@Author : mashenquan
@File : text_to_speech.py
@Desc : Text-to-Speech skill, which provides text-to-speech functionality
"""
import os
from metagpt.tools.azure_tts import oas3_azsure_tts
from metagpt.utils.common import initialize_environment
def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl",
subscription_key="", region=""):
"""Text to speech
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
:param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param text: The text used for voice conversion.
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
:return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
"""
initialize_environment()
if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \
(subscription_key and region):
return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
raise EnvironmentError

114
metagpt/tools/azure_tts.py Normal file
View file

@ -0,0 +1,114 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17
@Author : mashenquan
@File : azure_tts.py
@Desc : azure TTS OAS3 api, which provides text-to-speech functionality
"""
from pathlib import Path
from uuid import uuid4
import base64
import sys
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initialize_environment
from metagpt.logs import logger
from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer
import os
class AzureTTS:
"""Azure Text-to-Speech"""
def __init__(self, subscription_key, region):
"""
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
"""
self.subscription_key = subscription_key if subscription_key else os.environ.get('AZURE_TTS_SUBSCRIPTION_KEY')
self.region = region if region else os.environ.get('AZURE_TTS_REGION')
# 参数参考https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles
def synthesize_speech(self, lang, voice, text, output_file):
speech_config = SpeechConfig(
subscription=self.subscription_key, region=self.region)
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioConfig(filename=output_file)
synthesizer = SpeechSynthesizer(
speech_config=speech_config,
audio_config=audio_config)
# More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice
ssml_string = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' " \
f"xml:lang='{lang}' xmlns:mstts='http://www.w3.org/2001/mstts'>" \
f"<voice name='{voice}'>{text}</voice></speak>"
return synthesizer.speak_ssml_async(ssml_string).get()
@staticmethod
def role_style_text(role, style, text):
return f'<mstts:express-as role="{role}" style="{style}">{text}</mstts:express-as>'
@staticmethod
def role_text(role, text):
return f'<mstts:express-as role="{role}">{text}</mstts:express-as>'
@staticmethod
def style_text(style, text):
return f'<mstts:express-as style="{style}">{text}</mstts:express-as>'
# Export
def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""):
"""Text to speech
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
:param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param text: The text used for voice conversion.
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
:return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
"""
if not text:
return ""
if not lang:
lang = "zh-CN"
if not voice:
voice = "zh-CN-XiaomoNeural"
if not role:
role = "Girl"
if not style:
style = "affectionate"
if not subscription_key:
subscription_key = os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY")
if not region:
region = os.environ.get("AZURE_TTS_REGION")
xml_value = AzureTTS.role_style_text(role=role, style=style, text=text)
tts = AzureTTS(subscription_key=subscription_key, region=region)
filename = Path(__file__).resolve().parent / (str(uuid4()).replace("-", "") + ".wav")
try:
tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename))
with open(str(filename), mode="rb") as reader:
data = reader.read()
base64_string = base64.b64encode(data).decode('utf-8')
filename.unlink()
except Exception as e:
logger.error(f"text:{text}, error:{e}")
return ""
return base64_string
if __name__ == "__main__":
initialize_environment()
v = oas3_azsure_tts("测试test")
print(v)

27
metagpt/tools/hello.py Normal file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/5/2 16:03
@Author : mashenquan
@File : hello.py
@Desc : Implement the OpenAPI Specification 3.0 demo and use the following command to test the HTTP service:
curl -X 'POST' \
'http://localhost:8080/openapi/greeting/dave' \
-H 'accept: text/plain' \
-H 'Content-Type: application/json' \
-d '{}'
"""
import connexion
# openapi implement
def post_greeting(name: str) -> str:
return f"Hello {name}\n"
if __name__ == "__main__":
app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/')
app.add_api("openapi.yaml", arguments={"title": "Hello World Example"})
app.run(port=8080)

View file

@ -0,0 +1,46 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17
@Author : mashenquan
@File : metagpt_oas3_api_svc.py
@Desc : MetaGPT OpenAPI Specification 3.0 REST API service
"""
import asyncio
from pathlib import Path
import sys
import connexion
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initialize_environment
def oas_http_svc():
"""Start the OAS 3.0 OpenAPI HTTP service"""
initialize_environment()
app = connexion.FlaskApp(__name__, specification_dir='../../.well-known/')
app.add_api("metagpt_oas3_api.yaml")
app.add_api("openapi.yaml")
app.run(port=8080)
async def async_main():
"""Start the OAS 3.0 OpenAPI HTTP service in the background."""
loop = asyncio.get_event_loop()
loop.run_in_executor(None, oas_http_svc)
# TODO: replace following codes:
while True:
await asyncio.sleep(1)
print("sleep")
def main():
oas_http_svc()
if __name__ == "__main__":
# asyncio.run(async_main())
main()

View file

@ -0,0 +1,112 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : metagpt_text_to_image.py
@Desc : MetaGPT Text-to-Image OAS3 api, which provides text-to-image functionality.
"""
import base64
import os
import sys
from pathlib import Path
from typing import List, Dict
import requests
from pydantic import BaseModel
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initialize_environment
from metagpt.logs import logger
class MetaGPTText2Image:
def __init__(self, model_url):
"""
:param model_url: Model reset api url
"""
self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
def text_2_image(self, text, size_type="512x512"):
"""Text to image
:param text: The text used for image conversion.
:param size_type: One of ['512x512', '512x768']
:return: The image data is returned in Base64 encoding.
"""
headers = {
"Content-Type": "application/json"
}
dims = size_type.split("x")
data = {
"prompt": text,
"negative_prompt": "(easynegative:0.8),black, dark,Low resolution",
"override_settings": {"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"},
"seed": -1,
"batch_size": 1,
"n_iter": 1,
"steps": 20,
"cfg_scale": 11,
"width": int(dims[0]),
"height": int(dims[1]), # 768,
"restore_faces": False,
"tiling": False,
"do_not_save_samples": False,
"do_not_save_grid": False,
"enable_hr": False,
"hr_scale": 2,
"hr_upscaler": "Latent",
"hr_second_pass_steps": 0,
"hr_resize_x": 0,
"hr_resize_y": 0,
"hr_upscale_to_x": 0,
"hr_upscale_to_y": 0,
"truncate_x": 0,
"truncate_y": 0,
"applied_old_hires_behavior_to": None,
"eta": None,
"sampler_index": "DPM++ SDE Karras",
"alwayson_scripts": {},
}
class ImageResult(BaseModel):
images: List
parameters: Dict
try:
response = requests.post(self.model_url, headers=headers, json=data)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
result = ImageResult(**response.json())
if len(result.images) == 0:
return ""
return result.images[0]
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return ""
# Export
def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""):
"""Text to image
:param text: The text used for image conversion.
:param model_url: Model reset api
:param size_type: One of ['512x512', '512x768']
:return: The image data is returned in Base64 encoding.
"""
if not text:
return ""
if not model_url:
model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type)
if __name__ == "__main__":
initialize_environment()
v = oas3_metagpt_text_2_image("Panda emoji")
data = base64.b64decode(v)
with open("tmp.png", mode="wb") as writer:
writer.write(data)
print(v)

View file

@ -0,0 +1,92 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : openai_text_to_embedding.py
@Desc : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality.
For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object`
"""
import os
from pathlib import Path
from typing import List
import requests
from pydantic import BaseModel
import sys
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initialize_environment
from metagpt.logs import logger
class Embedding(BaseModel):
"""Represents an embedding vector returned by embedding endpoint."""
object: str # The object type, which is always "embedding".
embedding: List[
float] # The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the embedding guide.
index: int # The index of the embedding in the list of embeddings.
class Usage(BaseModel):
prompt_tokens: int
total_tokens: int
class ResultEmbedding(BaseModel):
object: str
data: List[Embedding]
model: str
usage: Usage
class OpenAIText2Embedding:
def __init__(self, openai_api_key):
"""
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
"""
self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
def text_2_embedding(self, text, model="text-embedding-ada-002"):
"""Text to embedding
:param text: The text used for embedding.
:param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
:return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.openai_api_key}"
}
data = {"input": text, "model": model}
try:
response = requests.post("https://api.openai.com/v1/embeddings", headers=headers, json=data)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return {}
# Export
def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
"""Text to embedding
:param text: The text used for embedding.
:param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
:return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
"""
if not text:
return ""
if not openai_api_key:
openai_api_key = os.environ.get("OPENAI_API_KEY")
return OpenAIText2Embedding(openai_api_key).text_2_embedding(text, model=model)
if __name__ == "__main__":
initialize_environment()
v = oas3_openai_text_to_embedding("Panda emoji")
print(v)

View file

@ -0,0 +1,100 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17
@Author : mashenquan
@File : openai_text_to_image.py
@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
"""
import base64
import os
import sys
from pathlib import Path
from typing import List
import requests
from pydantic import BaseModel
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initialize_environment
from metagpt.logs import logger
class OpenAIText2Image:
def __init__(self, openai_api_key):
"""
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
"""
self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
def text_2_image(self, text, size_type="1024x1024"):
"""Text to image
:param text: The text used for image conversion.
:param size_type: One of ['256x256', '512x512', '1024x1024']
:return: The image data is returned in Base64 encoding.
"""
class ImageUrl(BaseModel):
url: str
class ImageResult(BaseModel):
data: List[ImageUrl]
created: int
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.openai_api_key}"
}
data = {"prompt": text, "n": 1, "size": size_type}
try:
response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
result = ImageResult(**response.json())
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return ""
if len(result.data) > 0:
return OpenAIText2Image.get_image_data(result.data[0].url)
return ""
@staticmethod
def get_image_data(url):
"""Fetch image data from a URL and encode it as Base64
:param url: Image url
:return: Base64-encoded image data.
"""
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
image_data = response.content
base64_image = base64.b64encode(image_data).decode("utf-8")
return base64_image
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return ""
# Export
def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""):
"""Text to image
:param text: The text used for image conversion.
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
:param size_type: One of ['256x256', '512x512', '1024x1024']
:return: The image data is returned in Base64 encoding.
"""
if not text:
return ""
if not openai_api_key:
openai_api_key = os.environ.get("OPENAI_API_KEY")
return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type)
if __name__ == "__main__":
initialize_environment()
v = oas3_openai_text_to_image("Panda emoji")
print(v)

View file

@ -4,14 +4,18 @@
@Time : 2023/4/29 16:07
@Author : alexanderwu
@File : common.py
@Modified By: mashenquan, 2023-8-17, add `initalize_enviroment()` to load `config/config.yaml` to `os.environ`
"""
import ast
import contextlib
import inspect
import os
import re
from pathlib import Path
from typing import List, Tuple
import yaml
from metagpt.logs import logger
@ -254,3 +258,12 @@ def parse_recipient(text):
pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
recipient = re.search(pattern, text)
return recipient.group(1) if recipient else ""
def initialize_environment():
"""Load `config/config.yaml` to `os.environ`"""
yaml_file_path = Path(__file__).resolve().parent.parent.parent / "config/config.yaml"
with open(str(yaml_file_path), "r") as yaml_file:
data = yaml.safe_load(yaml_file)
for k, v in data.items():
os.environ[k] = str(v)

View file

@ -38,4 +38,6 @@ typing_extensions==4.5.0
aiofiles
libcst==1.0.1
qdrant-client==1.4.0
connexion[swagger-ui]
aiohttp_jinja2

View file

View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : test_text_to_embedding.py
@Desc : Unit tests.
"""
import asyncio
import base64
from pydantic import BaseModel
from metagpt.learn.text_to_embedding import text_to_embedding
async def mock_text_to_embedding():
class Input(BaseModel):
input: str
inputs = [
{"input": "Panda emoji"}
]
for i in inputs:
seed = Input(**i)
data = text_to_embedding(seed.input)
v = ResultEmbedding(**data)
assert len(v.data) > 0
def test_suite():
loop = asyncio.get_event_loop()
task = loop.create_task(mock_text_to_embedding())
loop.run_until_complete(task)
if __name__ == '__main__':
test_suite()

View file

@ -0,0 +1,41 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : test_text_to_image.py
@Desc : Unit tests.
"""
import asyncio
import base64
from pydantic import BaseModel
from metagpt.learn.text_to_image import text_to_image
async def mock_text_to_image():
class Input(BaseModel):
input: str
size_type: str
inputs = [
{"input": "Panda emoji", "size_type": "512x512"}
]
for i in inputs:
seed = Input(**i)
base64_data = text_to_image(seed.input)
assert base64_data != ""
print(f"{seed.input} -> {base64_data}")
assert base64.b64decode(base64_data, validate=True)
def test_suite():
loop = asyncio.get_event_loop()
task = loop.create_task(mock_text_to_image())
loop.run_until_complete(task)
if __name__ == '__main__':
test_suite()

View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/18
@Author : mashenquan
@File : test_text_to_speech.py
@Desc : Unit tests.
"""
import asyncio
import base64
from pydantic import BaseModel
from metagpt.learn.text_to_speech import text_to_speech
async def mock_text_to_speech():
class Input(BaseModel):
input: str
inputs = [
{"input": "Panda emoji"}
]
for i in inputs:
seed = Input(**i)
base64_data = text_to_speech(seed.input)
assert base64_data != ""
print(f"{seed.input} -> {base64_data}")
assert base64.b64decode(base64_data, validate=True)
def test_suite():
loop = asyncio.get_event_loop()
task = loop.create_task(mock_text_to_speech())
loop.run_until_complete(task)
if __name__ == '__main__':
test_suite()

View file

@ -4,8 +4,13 @@
@Time : 2023/7/1 22:50
@Author : alexanderwu
@File : test_azure_tts.py
@Modified By: mashenquan, 2023-8-17, move to `tools` folder.
"""
from metagpt.actions.azure_tts import AzureTTS
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.tools.azure_tts import AzureTTS
def test_azure_tts():