diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py
new file mode 100644
index 000000000..b1395a61a
--- /dev/null
+++ b/metagpt/learn/text_to_embedding.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : text_to_embedding.py
+@Desc    : Text-to-Embedding skill, which provides text-to-embedding functionality.
+"""
+
+from metagpt.tools.openai_text_2_embedding import oas3_openai_text_2_embedding
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
+    """Text to embedding
+
+    :param text: The text used for embedding.
+    :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
+    :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+    :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
+    """
+    initialize_environment()
+    return oas3_openai_text_2_embedding(text, model=model, openai_api_key=openai_api_key)
\ No newline at end of file
diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py
new file mode 100644
index 000000000..87668a13f
--- /dev/null
+++ b/metagpt/learn/text_to_image.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : text_to_image.py
+@Desc    : Text-to-Image skill, which provides text-to-image functionality.
+"""
+
+from metagpt.tools.openai_text_2_image import oas3_openai_text_2_image
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_image(text, size_type: str = "1024x1024", openai_api_key=""):
+    """Text to image
+
+    :param text: The text used for image conversion.
+    :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+    :param size_type: One of ['256x256', '512x512', '1024x1024']
+    :return: The image data is returned in Base64 encoding.
+    """
+    initialize_environment()
+    return oas3_openai_text_2_image(text, size_type, openai_api_key)
diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py
new file mode 100644
index 000000000..909a9dca1
--- /dev/null
+++ b/metagpt/learn/text_to_speech.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/17
+@Author  : mashenquan
+@File    : text_to_speech.py
+@Desc    : Text-to-Speech skill, which provides text-to-speech functionality
+"""
+
+from metagpt.tools.azure_tts import oas3_azsure_tts
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", subscription_key="", region=""):
+    """Text to speech
+    For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+
+    :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+    :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
+    :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+    :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+    :param text: The text used for voice conversion.
+    :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
+    :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
+    :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
+
+    """
+    initialize_environment()
+    return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py
index 2ec1539ef..21e8f1b6c 100644
--- a/metagpt/tools/azure_tts.py
+++ b/metagpt/tools/azure_tts.py
@@ -62,7 +62,7 @@ class AzureTTS:
 
 # Export
 def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""):
-    """oas3/tts/azsure
+    """Text to speech
     For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
 
     :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
diff --git a/tests/metagpt/learn/__init__.py b/tests/metagpt/learn/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/metagpt/learn/test_text_to_embedding.py b/tests/metagpt/learn/test_text_to_embedding.py
new file mode 100644
index 000000000..c85e5dde8
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_embedding.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : test_text_to_embedding.py
+@Desc    : Unit tests.
+"""
+
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_embedding import text_to_embedding
+
+
+async def mock_text_to_embedding():
+    class Input(BaseModel):
+        input: str
+
+    inputs = [
+        {"input": "Panda emoji"}
+    ]
+
+    for i in inputs:
+        seed = Input(**i)
+        data = text_to_embedding(seed.input)
+        v = ResultEmbedding(**data)
+        assert len(v.data) > 0
+
+
+def test_suite():
+    loop = asyncio.get_event_loop()
+    task = loop.create_task(mock_text_to_embedding())
+    loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+    test_suite()
diff --git a/tests/metagpt/learn/test_text_to_image.py b/tests/metagpt/learn/test_text_to_image.py
new file mode 100644
index 000000000..bfcb1db25
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_image.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : test_text_to_image.py
+@Desc    : Unit tests.
+"""
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_image import text_to_image
+
+
+async def mock_text_to_image():
+    class Input(BaseModel):
+        input: str
+        size_type: str
+
+    inputs = [
+        {"input": "Panda emoji", "size_type": "256x256"}
+    ]
+
+    for i in inputs:
+        seed = Input(**i)
+        base64_data = text_to_image(seed.input)
+        assert base64_data != ""
+        print(f"{seed.input} -> {base64_data}")
+        assert base64.b64decode(base64_data, validate=True)
+
+
+def test_suite():
+    loop = asyncio.get_event_loop()
+    task = loop.create_task(mock_text_to_image())
+    loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+    test_suite()
diff --git a/tests/metagpt/learn/test_text_to_speech.py b/tests/metagpt/learn/test_text_to_speech.py
new file mode 100644
index 000000000..dbb599e38
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_speech.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/18
+@Author  : mashenquan
+@File    : test_text_to_speech.py
+@Desc    : Unit tests.
+"""
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_speech import text_to_speech
+
+
+async def mock_text_to_speech():
+    class Input(BaseModel):
+        input: str
+
+    inputs = [
+        {"input": "Panda emoji"}
+    ]
+
+    for i in inputs:
+        seed = Input(**i)
+        base64_data = text_to_speech(seed.input)
+        assert base64_data != ""
+        print(f"{seed.input} -> {base64_data}")
+        assert base64.b64decode(base64_data, validate=True)
+
+
+def test_suite():
+    loop = asyncio.get_event_loop()
+    task = loop.create_task(mock_text_to_speech())
+    loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+    test_suite()
\ No newline at end of file