rm Chinese comments

This commit is contained in:
stellahsr 2024-03-19 23:51:57 +08:00
parent 6e28eafcd7
commit 3fac156d66

View file

@ -2,26 +2,37 @@ import re
def extract_scripts_from_codetext(codetext: str):
script_names = []
# 提供的文本内容,可能包含多个 [start of ... .py]
"""
Extracts Python script file names from a given text that contains multiple sections.
Each section starts with '[start of <script_name>.py]' and ends with '[end of <script_name>.py]'.
Parameters:
- codetext (str): A string that may contain multiple sections, each indicating the start of a Python script file.
Returns:
- list: A list of extracted Python script file names.
Example of codetext:
'''
[end of README.rst]
[start of sklearn/compose/_target.py]
... 文件内容 ...
... file content ...
[end of sklearn/compose/_target.py]
[start of another_module/example.py]
... 文件内容 ...
... file content ...
[end of another_module/example.py]
'''
"""
script_names = []
# 使用正则表达式匹配所有 “[start of 任意字符.py]”
# Match all occurrences of '[start of <script_name>.py]'
matches = re.findall(r"\[start of ([^\]]+\.py)\]", codetext)
if matches:
# 遍历所有匹配的文件名并打印
for script_name in matches:
print("Extracted script name:", script_name)
script_names.append(script_name)
else:
print("No script names found in the text.")
return script_names