diff --git a/data/inference/make_datasets/parse_utils.py b/data/inference/make_datasets/parse_utils.py
index ace137a44..79b6189e1 100644
--- a/data/inference/make_datasets/parse_utils.py
+++ b/data/inference/make_datasets/parse_utils.py
@@ -2,26 +2,37 @@ import re
 
 
 def extract_scripts_from_codetext(codetext: str):
-    script_names = []
-    # 提供的文本内容，可能包含多个 [start of ... .py]
     """
+    Extracts Python script file names from a given text that contains multiple sections.
+    Each section starts with '[start of <script_name>.py]' and ends with '[end of <script_name>.py]'.
+
+    Parameters:
+    - codetext (str): A string that may contain multiple sections, each indicating the start of a Python script file.
+
+    Returns:
+    - list: A list of extracted Python script file names.
+
+    Example of codetext:
+    '''
     [end of README.rst]
     [start of sklearn/compose/_target.py]
-    ... 文件内容 ...
+    ... file content ...
     [end of sklearn/compose/_target.py]
     [start of another_module/example.py]
-    ... 文件内容 ...
+    ... file content ...
     [end of another_module/example.py]
+    '''
     """
+    script_names = []
 
-    # 使用正则表达式匹配所有 “[start of 任意字符.py]”
+    # Match all occurrences of '[start of <script_name>.py]'
     matches = re.findall(r"\[start of ([^\]]+\.py)\]", codetext)
 
     if matches:
-        # 遍历所有匹配的文件名并打印
         for script_name in matches:
             print("Extracted script name:", script_name)
             script_names.append(script_name)
     else:
         print("No script names found in the text.")
+
     return script_names