feat: convert functions docstring schema to yaml

This commit is contained in:
刘棒棒 2023-12-18 20:18:20 +08:00
parent ea84fd34cd
commit b5833397a4
2 changed files with 114 additions and 12 deletions

View file

@ -1,9 +1,12 @@
import ast
import os
import re
import yaml
import inspect
import importlib
from pathlib import Path
from typing import Dict, List
from metagpt.logs import logger
def extract_function_signatures(file_path):
@ -12,6 +15,7 @@ def extract_function_signatures(file_path):
tree = ast.parse(source_code)
function_signatures = []
function_returns = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 只提取用户自定义函数,排除内置函数
@ -30,29 +34,84 @@ def extract_function_signatures(file_path):
'udf_path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}',
'udf_doc': inspect.getdoc(getattr(module, function_name))}
function_signatures.append(function_schema)
return function_signatures
# 获取函数返回变量名
source_lines, _ = inspect.getsourcelines(getattr(module, function_name))
for line in source_lines:
if line.strip().startswith("return "):
function_returns.append({
'udf_name': function_name,
'udf_returns': [var.strip() for var in line.strip()[len("return "):].split(',')]
})
break
return function_signatures, function_returns
def get_function_signatures_in_folder(folder_path):
python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')]
all_function_signatures = []
all_function_returns = []
for file_name in python_files:
file_path = os.path.join(folder_path, file_name)
function_signatures = extract_function_signatures(file_path)
function_signatures, function_returns = extract_function_signatures(file_path)
all_function_signatures.extend(function_signatures)
all_function_returns.extend(function_returns)
return all_function_signatures, all_function_returns
return all_function_signatures
# TODO: Create Tools Yaml Style Schema
def docstring_to_yaml(docstring: str, return_vars: List[str] = None):
logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n")
if docstring is None:
return {}
# 匹配简介部分
description_match = re.search(r'^(.*?)(?:Args:|Returns:|Raises:|$)', docstring, re.DOTALL)
description = description_match.group(1).strip() if description_match else ""
# 匹配Args部分
args_match = re.search(r'Args:\s*(.*?)(?:Returns:|Raises:|$)', docstring, re.DOTALL)
_args = args_match.group(1).strip() if args_match else ""
variable_pattern = re.compile(r'(\w+)\s*\((.*?)\):\s*(.*)')
params = variable_pattern.findall(_args)
if not params:
err_msg = f"No Args found in docstring as following, Please make sure it is google style\
: \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n."
logger.error(err_msg)
raise ValueError(err_msg)
# 匹配Returns部分
returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL)
returns = returns_match.group(1).strip() if returns_match else ""
return_pattern = re.compile(r'^(.*)\s*:\s*(.*)$')
# 添加返回值变量名
return_vars = return_vars if isinstance(return_vars, list) else [return_vars]
returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)]
# 构建YAML字典
yaml_data = {
'description': description.strip('.').strip(),
'parameters': {
'properties': {param[0]: {'type': param[1], 'description': param[2]} for param in params},
'required': [param[0] for param in params]
},
'returns': {ret[0]: {'type': ret[1], 'description': ret[2]} for ret in returns}
}
return yaml_data
def extract_function_schema_yaml_in_folder(folder_path: str):
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
function_schema_yaml_data = {}
for func_docstring, func_returns in zip(function_signatures, function_returns):
if func_docstring['udf_doc']:
fun_yaml_data = docstring_to_yaml(func_docstring['udf_doc'], func_returns['udf_returns'])
fun_yaml_data.update({'type': 'function'})
function_schema_yaml_data.update({func_returns['udf_name']: fun_yaml_data})
return yaml.dump(function_schema_yaml_data, default_flow_style=False)
folder_path = str(Path(__file__).parent.absolute())
function_signatures = get_function_signatures_in_folder(folder_path)
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
UDFS = [func for func in function_signatures
if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))]
# TODO: Create Yaml style UDFS Schema
def udfs2yaml(udfs: List[Dict]) -> Dict:
pass
UDFS_YAML = extract_function_schema_yaml_in_folder(folder_path)

View file

@ -1,9 +1,52 @@
from metagpt.tools.functions.libs.udf import UDFS
import pytest
import yaml
from metagpt.tools.functions.libs.udf import UDFS, docstring_to_yaml, UDFS_YAML
from metagpt.logs import logger
def test_udfs():
assert len(UDFS) > 0
assert 'name' in UDFS[0]
assert 'doc' in UDFS[0]
assert 'udf_name' in UDFS[0]
assert 'udf_doc' in UDFS[0]
logger.info(UDFS)
def test_docstring2yaml():
docstring = """Calculate the duration in hours between two datetime columns.
Args:
dataframe (pd.DataFrame): The dataframe containing the datetime columns.
Returns:
pd.DataFrame: The dataframe with an additional column 'duration_hour' added.
"""
yaml_result = docstring_to_yaml(docstring, return_vars='dataframe')
assert 'parameters' in yaml_result
assert 'properties' in yaml_result['parameters']
assert 'dataframe' in yaml_result['parameters']['properties']
def test_docstring2yaml_error():
docstring = """Calculate the duration in hours between two datetime columns.
args:
dataframe (pd.DataFrame): The dataframe containing the datetime columns.
returns:
pd.DataFrame: The dataframe with an additional column 'duration_hour' added.
"""
with pytest.raises(ValueError) as exc_info:
docstring_to_yaml(docstring, return_vars='dataframe')
assert "No Args found" in exc_info
def test_UDFS_YAML():
assert len(UDFS_YAML) > 0
logger.info(f"\n\n{UDFS_YAML}")
function_schema = yaml.load(UDFS_YAML, Loader=yaml.FullLoader)
assert 'description' in function_schema[list(function_schema.keys())[0]]
assert 'type' in function_schema[list(function_schema.keys())[0]]
assert 'parameters' in function_schema[list(function_schema.keys())[0]]
assert 'properties' in function_schema[list(function_schema.keys())[0]]['parameters']
assert 'required' in function_schema[list(function_schema.keys())[0]]['parameters']
assert 'returns' in function_schema[list(function_schema.keys())[0]]