diff --git a/metagpt/prompts/mi/write_analysis_code.py b/metagpt/prompts/mi/write_analysis_code.py index e8ffedce4..e5663d498 100644 --- a/metagpt/prompts/mi/write_analysis_code.py +++ b/metagpt/prompts/mi/write_analysis_code.py @@ -16,7 +16,7 @@ STRUCTUAL_PROMPT = """ - Always prioritize using pre-defined tools for the same functionality. # Output -Output code in the following format: +While some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format: ```python your code ``` diff --git a/metagpt/tools/libs/data_preprocess.py b/metagpt/tools/libs/data_preprocess.py index 3c8ebcc02..aa9070689 100644 --- a/metagpt/tools/libs/data_preprocess.py +++ b/metagpt/tools/libs/data_preprocess.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from typing import Literal import numpy as np import pandas as pd @@ -90,14 +91,16 @@ class FillMissingValue(DataPreprocessTool): Completing missing values with simple strategies. """ - def __init__(self, features: list, strategy: str = "mean", fill_value=None): + def __init__( + self, features: list, strategy: Literal["mean", "median", "most_frequent", "constant"] = "mean", fill_value=None + ): """ Initialize self. Args: features (list): Columns to be processed. - strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only - be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'. + strategy (Literal["mean", "median", "most_frequent", "constant"], optional): The imputation strategy, notice 'mean' and 'median' can only + be used for numeric features. Defaults to 'mean'. fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values. Defaults to None. """ diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py index b837d8288..3d6a49769 100644 --- a/metagpt/tools/tool_convert.py +++ b/metagpt/tools/tool_convert.py @@ -2,6 +2,8 @@ import inspect from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces +PARSER = GoogleDocstringParser + def convert_code_to_tool_schema(obj, include: list[str] = None): docstring = inspect.getdoc(obj) @@ -23,54 +25,31 @@ def convert_code_to_tool_schema(obj, include: list[str] = None): return schema -def function_docstring_to_schema(fn_obj, docstring): +def function_docstring_to_schema(fn_obj, docstring) -> dict: + """ + Converts a function's docstring into a schema dictionary. + + Args: + fn_obj: The function object. + docstring: The docstring of the function. + + Returns: + A dictionary representing the schema of the function's docstring. + The dictionary contains the following keys: + - 'type': The type of the function ('function' or 'async_function'). + - 'description': The first section of the docstring describing the function overall. Provided to LLMs for both recommending and using the function. + - 'signature': The signature of the function, which helps LLMs understand how to call the function. + - 'parameters': Docstring section describing parameters including args and returns, served as extra details for LLM perception. + """ + signature = inspect.signature(fn_obj) + + docstring = remove_spaces(docstring) + + overall_desc, param_desc = PARSER.parse(docstring) + function_type = "function" if not inspect.iscoroutinefunction(fn_obj) else "async_function" - return {"type": function_type, **docstring_to_schema(docstring)} - -def docstring_to_schema(docstring: str): - if docstring is None: - return {} - - parser = GoogleDocstringParser(docstring=docstring) - - # 匹配简介部分 - description = parser.parse_desc() - - # 匹配Args部分 - params = parser.parse_params() - parameter_schema = {"properties": {}, "required": []} - for param in params: - param_name, param_type, param_desc = param - # check required or optional - is_optional, param_type = parser.check_and_parse_optional(param_type) - if not is_optional: - parameter_schema["required"].append(param_name) - # type and desc - param_dict = {"type": param_type, "description": remove_spaces(param_desc)} - # match Default for optional args - has_default_val, default_val = parser.check_and_parse_default_value(param_desc) - if has_default_val: - param_dict["default"] = default_val - # match Enum - has_enum, enum_vals = parser.check_and_parse_enum(param_desc) - if has_enum: - param_dict["enum"] = enum_vals - # add to parameter schema - parameter_schema["properties"].update({param_name: param_dict}) - - # 匹配Returns部分 - returns = parser.parse_returns() - - # 构建YAML字典 - schema = { - "description": description, - "parameters": parameter_schema, - } - if returns: - schema["returns"] = [{"type": ret[0], "description": remove_spaces(ret[1])} for ret in returns] - - return schema + return {"type": function_type, "description": overall_desc, "signature": str(signature), "parameters": param_desc} def get_class_method_docstring(cls, method_name): diff --git a/metagpt/utils/parse_docstring.py b/metagpt/utils/parse_docstring.py index e91be8e75..63c0e6890 100644 --- a/metagpt/utils/parse_docstring.py +++ b/metagpt/utils/parse_docstring.py @@ -1,45 +1,23 @@ import re from typing import Tuple -from pydantic import BaseModel - def remove_spaces(text): return re.sub(r"\s+", " ", text).strip() -class DocstringParser(BaseModel): - docstring: str +class DocstringParser: + @staticmethod + def parse(docstring: str) -> Tuple[str, str]: + """Parse the docstring and return the overall description and the parameter description. - def parse_desc(self) -> str: - """Parse and return the description from the docstring.""" - - def parse_params(self) -> list[Tuple[str, str, str]]: - """Parse and return the parameters from the docstring. + Args: + docstring (str): The docstring to be parsed. Returns: - list[Tuple[str, str, str]]: A list of input paramter info. Each info is a triple of (param name, param type, param description) + Tuple[str, str]: A tuple of (overall description, parameter description) """ - def parse_returns(self) -> list[Tuple[str, str]]: - """Parse and return the output information from the docstring. - - Returns: - list[Tuple[str, str]]: A list of output info. Each info is a tuple of (return type, return description) - """ - - @staticmethod - def check_and_parse_optional(param_type: str) -> Tuple[bool, str]: - """Check if a parameter is optional and return a processed param_type rid of the optionality info if so""" - - @staticmethod - def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]: - """Check if a parameter has a default value and return the default value if so""" - - @staticmethod - def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]: - """Check if a parameter description includes an enum and return enum values if so""" - class reSTDocstringParser(DocstringParser): """A parser for reStructuredText (reST) docstring""" @@ -48,40 +26,18 @@ class reSTDocstringParser(DocstringParser): class GoogleDocstringParser(DocstringParser): """A parser for Google-stype docstring""" - docstring: str - - def parse_desc(self) -> str: - description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", self.docstring, re.DOTALL) - description = remove_spaces(description_match.group(1)) if description_match else "" - return description - - def parse_params(self) -> list[Tuple[str, str, str]]: - args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", self.docstring, re.DOTALL) - _args = args_match.group(1).strip() if args_match else "" - # variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)") - variable_pattern = re.compile( - r"(\w+)\s*\((.*?)\):\s*(.*?)(?=\n\s*\w+\s*\(|\Z)", re.DOTALL - ) # (?=\n\w+\s*\(|\Z) is to assert that what follows is either the start of the next parameter (indicated by a newline, some word characters, and an opening parenthesis) or the end of the string (\Z). - params = variable_pattern.findall(_args) - return params - - def parse_returns(self) -> list[Tuple[str, str]]: - returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", self.docstring, re.DOTALL) - returns = returns_match.group(1).strip() if returns_match else "" - return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$") - returns = return_pattern.findall(returns) - return returns - @staticmethod - def check_and_parse_optional(param_type: str) -> Tuple[bool, str]: - return "optional" in param_type, param_type.replace(", optional", "") + def parse(docstring: str) -> Tuple[str, str]: + if not docstring: + return "", "" - @staticmethod - def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]: - default_val = re.search(r"Defaults to (.+?)\.", param_desc) - return (True, default_val.group(1)) if default_val else (False, "") + docstring = remove_spaces(docstring) - @staticmethod - def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]: - enum_val = re.search(r"Enum: \[(.+?)\]", param_desc) - return (True, [e.strip() for e in enum_val.group(1).split(",")]) if enum_val else (False, []) + if "Args:" in docstring: + overall_desc, param_desc = docstring.split("Args:") + param_desc = "Args:" + param_desc + else: + overall_desc = docstring + param_desc = "" + + return overall_desc, param_desc