From b5833397a4a12a46f41d02e6f2b44edadd48c3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 20:18:20 +0800 Subject: [PATCH] feat: convert functions docstring schema to yaml --- metagpt/tools/functions/libs/udf/__init__.py | 77 +++++++++++++++++--- tests/metagpt/tools/functions/test_udf.py | 49 ++++++++++++- 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 5bad9a3a4..0cada9545 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -1,9 +1,12 @@ import ast import os +import re +import yaml import inspect import importlib from pathlib import Path from typing import Dict, List +from metagpt.logs import logger def extract_function_signatures(file_path): @@ -12,6 +15,7 @@ def extract_function_signatures(file_path): tree = ast.parse(source_code) function_signatures = [] + function_returns = [] for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): # 只提取用户自定义函数,排除内置函数 @@ -30,29 +34,84 @@ def extract_function_signatures(file_path): 'udf_path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}', 'udf_doc': inspect.getdoc(getattr(module, function_name))} function_signatures.append(function_schema) - - return function_signatures + # 获取函数返回变量名 + source_lines, _ = inspect.getsourcelines(getattr(module, function_name)) + for line in source_lines: + if line.strip().startswith("return "): + function_returns.append({ + 'udf_name': function_name, + 'udf_returns': [var.strip() for var in line.strip()[len("return "):].split(',')] + }) + break + return function_signatures, function_returns def get_function_signatures_in_folder(folder_path): python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')] all_function_signatures = [] + all_function_returns = [] for file_name in python_files: file_path = os.path.join(folder_path, file_name) - function_signatures = extract_function_signatures(file_path) + function_signatures, function_returns = extract_function_signatures(file_path) all_function_signatures.extend(function_signatures) + all_function_returns.extend(function_returns) + return all_function_signatures, all_function_returns - return all_function_signatures + +# TODO: Create Tools Yaml Style Schema +def docstring_to_yaml(docstring: str, return_vars: List[str] = None): + logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n") + if docstring is None: + return {} + # 匹配简介部分 + description_match = re.search(r'^(.*?)(?:Args:|Returns:|Raises:|$)', docstring, re.DOTALL) + description = description_match.group(1).strip() if description_match else "" + + # 匹配Args部分 + args_match = re.search(r'Args:\s*(.*?)(?:Returns:|Raises:|$)', docstring, re.DOTALL) + _args = args_match.group(1).strip() if args_match else "" + variable_pattern = re.compile(r'(\w+)\s*\((.*?)\):\s*(.*)') + params = variable_pattern.findall(_args) + if not params: + err_msg = f"No Args found in docstring as following, Please make sure it is google style\ + : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." + logger.error(err_msg) + raise ValueError(err_msg) + # 匹配Returns部分 + returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) + returns = returns_match.group(1).strip() if returns_match else "" + return_pattern = re.compile(r'^(.*)\s*:\s*(.*)$') + # 添加返回值变量名 + return_vars = return_vars if isinstance(return_vars, list) else [return_vars] + returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)] + # 构建YAML字典 + yaml_data = { + 'description': description.strip('.').strip(), + 'parameters': { + 'properties': {param[0]: {'type': param[1], 'description': param[2]} for param in params}, + 'required': [param[0] for param in params] + }, + 'returns': {ret[0]: {'type': ret[1], 'description': ret[2]} for ret in returns} + } + return yaml_data + + +def extract_function_schema_yaml_in_folder(folder_path: str): + function_signatures, function_returns = get_function_signatures_in_folder(folder_path) + function_schema_yaml_data = {} + for func_docstring, func_returns in zip(function_signatures, function_returns): + if func_docstring['udf_doc']: + fun_yaml_data = docstring_to_yaml(func_docstring['udf_doc'], func_returns['udf_returns']) + fun_yaml_data.update({'type': 'function'}) + function_schema_yaml_data.update({func_returns['udf_name']: fun_yaml_data}) + return yaml.dump(function_schema_yaml_data, default_flow_style=False) folder_path = str(Path(__file__).parent.absolute()) -function_signatures = get_function_signatures_in_folder(folder_path) +function_signatures, function_returns = get_function_signatures_in_folder(folder_path) UDFS = [func for func in function_signatures if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] - -# TODO: Create Yaml style UDFS Schema -def udfs2yaml(udfs: List[Dict]) -> Dict: - pass +UDFS_YAML = extract_function_schema_yaml_in_folder(folder_path) diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/functions/test_udf.py index b0c921180..89897e548 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/functions/test_udf.py @@ -1,9 +1,52 @@ -from metagpt.tools.functions.libs.udf import UDFS +import pytest +import yaml + +from metagpt.tools.functions.libs.udf import UDFS, docstring_to_yaml, UDFS_YAML from metagpt.logs import logger def test_udfs(): assert len(UDFS) > 0 - assert 'name' in UDFS[0] - assert 'doc' in UDFS[0] + assert 'udf_name' in UDFS[0] + assert 'udf_doc' in UDFS[0] logger.info(UDFS) + + +def test_docstring2yaml(): + docstring = """Calculate the duration in hours between two datetime columns. + + Args: + dataframe (pd.DataFrame): The dataframe containing the datetime columns. + + Returns: + pd.DataFrame: The dataframe with an additional column 'duration_hour' added. + """ + + yaml_result = docstring_to_yaml(docstring, return_vars='dataframe') + assert 'parameters' in yaml_result + assert 'properties' in yaml_result['parameters'] + assert 'dataframe' in yaml_result['parameters']['properties'] + + +def test_docstring2yaml_error(): + docstring = """Calculate the duration in hours between two datetime columns. + args: + dataframe (pd.DataFrame): The dataframe containing the datetime columns. + returns: + pd.DataFrame: The dataframe with an additional column 'duration_hour' added. + """ + with pytest.raises(ValueError) as exc_info: + docstring_to_yaml(docstring, return_vars='dataframe') + assert "No Args found" in exc_info + + +def test_UDFS_YAML(): + assert len(UDFS_YAML) > 0 + logger.info(f"\n\n{UDFS_YAML}") + function_schema = yaml.load(UDFS_YAML, Loader=yaml.FullLoader) + assert 'description' in function_schema[list(function_schema.keys())[0]] + assert 'type' in function_schema[list(function_schema.keys())[0]] + assert 'parameters' in function_schema[list(function_schema.keys())[0]] + assert 'properties' in function_schema[list(function_schema.keys())[0]]['parameters'] + assert 'required' in function_schema[list(function_schema.keys())[0]]['parameters'] + assert 'returns' in function_schema[list(function_schema.keys())[0]]