diff --git a/metagpt/actions/skill_action.py b/metagpt/actions/skill_action.py
index 660d785ff..758591fdd 100644
--- a/metagpt/actions/skill_action.py
+++ b/metagpt/actions/skill_action.py
@@ -9,14 +9,12 @@
 from __future__ import annotations
 
 import ast
-import asyncio
 import importlib
 import traceback
 from copy import deepcopy
 
 from metagpt.actions import Action, ActionOutput
-from metagpt.config import CONFIG
-from metagpt.learn.skill_loader import Returns, Skill
+from metagpt.learn.skill_loader import Skill
 from metagpt.logs import logger
 
 
@@ -110,15 +108,3 @@ if __name__ == "__main__":
     ArgumentsParingAction.parse_arguments(
         skill_name="text_to_image", txt='`text_to_image(text="Draw an apple", size_type="512x512")`'
     )
-    CONFIG.set_context({})
-    args = {"text": "hello world", "role": "Girl"}
-    action = SkillAction(
-        skill=Skill(
-            name="text_to_speech", description="", id="", arguments={}, examples=[], returns=Returns(type="string")
-        ),
-        args=args,
-    )
-    loop = asyncio.new_event_loop()
-    t = loop.create_task(action.run())
-    r = loop.run_until_complete(t)
-    print(r)
diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py
index 691aa7f6a..81bc8512b 100644
--- a/metagpt/learn/text_to_speech.py
+++ b/metagpt/learn/text_to_speech.py
@@ -9,7 +9,9 @@
 import openai
 
 from metagpt.config import CONFIG
+from metagpt.const import BASE64_FORMAT
 from metagpt.tools.azure_tts import oas3_azsure_tts
+from metagpt.utils.s3 import S3
 
 
 async def text_to_speech(
@@ -38,10 +40,10 @@ async def text_to_speech(
     audio_declaration = "data:audio/wav;base64,"
     if (CONFIG.AZURE_TTS_SUBSCRIPTION_KEY and CONFIG.AZURE_TTS_REGION) or (subscription_key and region):
         base64_data = await oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
-        # s3 = S3()
-        # url = await s3.cache(data=base64_data, file_ext=".wav", format=BASE64_FORMAT)
-        # if url:
-        #     return f"[{text}]({url})"
+        s3 = S3()
+        url = await s3.cache(data=base64_data, file_ext=".wav", format=BASE64_FORMAT)
+        if url:
+            return f"[{text}]({url})"
         return audio_declaration + base64_data if base64_data else base64_data
 
     raise openai.error.InvalidRequestError(message="AZURE_TTS_SUBSCRIPTION_KEY and AZURE_TTS_REGION error", param={})