diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py new file mode 100644 index 000000000..b1395a61a --- /dev/null +++ b/metagpt/learn/text_to_embedding.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : text_to_embedding.py +@Desc : Text-to-Embedding skill, which provides text-to-embedding functionality. +""" + +from metagpt.tools.openai_text_2_embedding import oas3_openai_text_2_embedding +from metagpt.utils.common import initialize_environment + + +def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""): + """Text to embedding + + :param text: The text used for embedding. + :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`. + """ + initialize_environment() + return oas3_openai_text_2_embedding(text, model=model, openai_api_key=openai_api_key) \ No newline at end of file diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py new file mode 100644 index 000000000..87668a13f --- /dev/null +++ b/metagpt/learn/text_to_image.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : text_to_image.py +@Desc : Text-to-Image skill, which provides text-to-image functionality. +""" + +from metagpt.tools.openai_text_2_image import oas3_openai_text_2_image +from metagpt.utils.common import initialize_environment + + +def text_to_image(text, size_type: str = "1024x1024", openai_api_key=""): + """Text to image + + :param text: The text used for image conversion. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :param size_type: One of ['256x256', '512x512', '1024x1024'] + :return: The image data is returned in Base64 encoding. + """ + initialize_environment() + return oas3_openai_text_2_image(text, size_type, openai_api_key) diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py new file mode 100644 index 000000000..909a9dca1 --- /dev/null +++ b/metagpt/learn/text_to_speech.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : text_to_speech.py +@Desc : Text-to-Speech skill, which provides text-to-speech functionality +""" + +from metagpt.tools.azure_tts import oas3_azsure_tts +from metagpt.utils.common import initialize_environment + + +def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", subscription_key="", region=""): + """Text to speech + For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + + :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery` + :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param text: The text used for voice conversion. + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. + + """ + initialize_environment() + return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py index 2ec1539ef..21e8f1b6c 100644 --- a/metagpt/tools/azure_tts.py +++ b/metagpt/tools/azure_tts.py @@ -62,7 +62,7 @@ class AzureTTS: # Export def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""): - """oas3/tts/azsure + """Text to speech For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` diff --git a/tests/metagpt/learn/__init__.py b/tests/metagpt/learn/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/metagpt/learn/test_text_to_embedding.py b/tests/metagpt/learn/test_text_to_embedding.py new file mode 100644 index 000000000..c85e5dde8 --- /dev/null +++ b/tests/metagpt/learn/test_text_to_embedding.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_embedding.py +@Desc : Unit tests. +""" + +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_embedding import text_to_embedding + + +async def mock_text_to_embedding(): + class Input(BaseModel): + input: str + + inputs = [ + {"input": "Panda emoji"} + ] + + for i in inputs: + seed = Input(**i) + data = text_to_embedding(seed.input) + v = ResultEmbedding(**data) + assert len(v.data) > 0 + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_embedding()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() diff --git a/tests/metagpt/learn/test_text_to_image.py b/tests/metagpt/learn/test_text_to_image.py new file mode 100644 index 000000000..bfcb1db25 --- /dev/null +++ b/tests/metagpt/learn/test_text_to_image.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_image.py +@Desc : Unit tests. +""" +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_image import text_to_image + + +async def mock_text_to_image(): + class Input(BaseModel): + input: str + size_type: str + + inputs = [ + {"input": "Panda emoji", "size_type": "256x256"} + ] + + for i in inputs: + seed = Input(**i) + base64_data = text_to_image(seed.input) + assert base64_data != "" + print(f"{seed.input} -> {base64_data}") + assert base64.b64decode(base64_data, validate=True) + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_image()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() diff --git a/tests/metagpt/learn/test_text_to_speech.py b/tests/metagpt/learn/test_text_to_speech.py new file mode 100644 index 000000000..dbb599e38 --- /dev/null +++ b/tests/metagpt/learn/test_text_to_speech.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_speech.py +@Desc : Unit tests. +""" +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_speech import text_to_speech + + +async def mock_text_to_speech(): + class Input(BaseModel): + input: str + + inputs = [ + {"input": "Panda emoji"} + ] + + for i in inputs: + seed = Input(**i) + base64_data = text_to_speech(seed.input) + assert base64_data != "" + print(f"{seed.input} -> {base64_data}") + assert base64.b64decode(base64_data, validate=True) + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_speech()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() \ No newline at end of file