diff --git a/.gitignore b/.gitignore index 87c7b3120..a69b3b1c2 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ tests/metagpt/utils/file_repo_git *.png htmlcov htmlcov.* +cov.xml *.dot *.pkl *-structure.csv diff --git a/docs/FAQ-EN.md b/docs/FAQ-EN.md index d4a9f6097..145d27be9 100644 --- a/docs/FAQ-EN.md +++ b/docs/FAQ-EN.md @@ -130,7 +130,7 @@ 1. HTML Layout: Outputs the HTML code for the page. 1. CSS Styles (styles.css): Outputs the CSS code for the page. - 1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/sd_engine. + 1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/libs/sd_engine.py. 1. Configuration instructions for SD Skills: The SD interface is currently deployed based on *https://github.com/AUTOMATIC1111/stable-diffusion-webui* **For environmental configurations and model downloads, please refer to the aforementioned GitHub repository. To initiate the SD service that supports API calls, run the command specified in cmd with the parameter nowebui, i.e., diff --git a/metagpt/const.py b/metagpt/const.py index a57464a19..7a19e81d0 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp" SOURCE_ROOT = METAGPT_ROOT / "metagpt" PROMPT_PATH = SOURCE_ROOT / "prompts" SKILL_DIRECTORY = SOURCE_ROOT / "skills" -TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas" -TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs" +TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas" +TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs" # REAL CONSTS diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py index 31d754a9e..ff29d5ed4 100644 --- a/metagpt/prompts/ml_engineer.py +++ b/metagpt/prompts/ml_engineer.py @@ -15,7 +15,7 @@ Keep dataset column information updated before model train. # Task Update and print the dataset's column information only if the train or test data has changed. Use the following code: ```python -from metagpt.tools.functions.libs.data_preprocess import get_column_info +from metagpt.tools.libs.data_preprocess import get_column_info column_info = get_column_info(df) print("column_info") @@ -248,7 +248,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie ```python # Step 1: fill missing value # Tools used: ['FillMissingValue'] -from metagpt.tools.functions.libs.data_preprocess import FillMissingValue +from metagpt.tools.libs.data_preprocess import FillMissingValue train_processed = train.copy() test_processed = test.copy() diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index 4ca46fc89..23b51533d 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -8,7 +8,7 @@ from enum import Enum from metagpt.tools import tool_types # this registers all tool types -from metagpt.tools.functions import libs # this registers all tools +from metagpt.tools import libs # this registers all tools from metagpt.tools.tool_registry import TOOL_REGISTRY _ = tool_types # Avoid pre-commit error diff --git a/metagpt/tools/functions/__init__.py b/metagpt/tools/functions/__init__.py deleted file mode 100644 index a0a43f507..000000000 --- a/metagpt/tools/functions/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:32 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/metagpt/tools/functions/libs/base.py b/metagpt/tools/functions/libs/base.py deleted file mode 100644 index c39adc66b..000000000 --- a/metagpt/tools/functions/libs/base.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/12/10 20:12 -# @Author : lidanyang -# @File : base -# @Desc : -class MLProcess(object): - def fit(self, df): - raise NotImplementedError - - def transform(self, df): - raise NotImplementedError - - def fit_transform(self, df): - self.fit(df) - return self.transform(df) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py deleted file mode 100644 index 6644565d7..000000000 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ /dev/null @@ -1,126 +0,0 @@ -import ast -import os -import re -import yaml -import inspect -import importlib -from pathlib import Path -from typing import List -from metagpt.logs import logger - - -def extract_function_signatures(file_path): - with open(file_path, "r", encoding="utf-8") as file: - source_code = file.read() - - tree = ast.parse(source_code) - function_signatures = [] - function_returns = [] - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef): - # 只提取用户自定义函数,排除内置函数 - if not (node.name.startswith("__") and node.name.endswith("__")): - # 获取函数名 - function_name = node.name - # 获取参数列表 - args = [arg.arg for arg in node.args.args] - # 获取函数签名 - function_signature = f"{function_name}({', '.join(args)})" - # 导入函数 - module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)] - module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}") - # 将函数导入到当前命名空间 - globals().update({function_name: getattr(module, function_name)}) - # 获取函数注释和函数路径 - function_schema = { - "udf_name": function_signature, - "udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}", - "udf_doc": inspect.getdoc(getattr(module, function_name)), - } - function_signatures.append(function_schema) - # 获取函数返回变量名 - source_lines, _ = inspect.getsourcelines(getattr(module, function_name)) - for line in source_lines: - if line.strip().startswith("return "): - function_returns.append( - { - "udf_name": function_name, - "udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")], - } - ) - break - - # 没有返回值的函数 - if not function_returns or function_returns[-1]["udf_name"] != function_name: - function_returns.append({"udf_name": function_name, "udf_returns": [None]}) - return function_signatures, function_returns - - -def get_function_signatures_in_folder(folder_path): - python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"] - all_function_signatures = [] - all_function_returns = [] - - for file_name in python_files: - file_path = os.path.join(folder_path, file_name) - function_signatures, function_returns = extract_function_signatures(file_path) - all_function_signatures.extend(function_signatures) - all_function_returns.extend(function_returns) - return all_function_signatures, all_function_returns - - -# Create Tools Yaml Style Schema -def docstring_to_yaml(docstring: str, return_vars: List[str] = None): - logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n") - if docstring is None: - return {} - # 匹配简介部分 - description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL) - description = description_match.group(1).strip() if description_match else "" - - # 匹配Args部分 - args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL) - _args = args_match.group(1).strip() if args_match else "" - variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)") - params = variable_pattern.findall(_args) - if not params: - params = ((None, None, None),) - # 匹配Returns部分 - returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL) - returns = returns_match.group(1).strip() if returns_match else "" - return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$") - # 添加返回值变量名 - return_vars = return_vars if isinstance(return_vars, list) else [return_vars] - returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)] - # 构建YAML字典 - yaml_data = { - "description": description.strip(".").strip(), - "parameters": { - "properties": { - param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None - }, - "required": [param[0] for param in params if param[0] is not None], - }, - "returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns}, - } - return yaml_data - - -def extract_function_schema_yaml_in_folder(folder_path: str): - function_signatures, function_returns = get_function_signatures_in_folder(folder_path) - function_schema_yaml_data = {} - for func_docstring, func_returns in zip(function_signatures, function_returns): - if func_docstring["udf_doc"]: - fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"]) - fun_yaml_data.update({"type": "function"}) - function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data}) - return yaml.dump(function_schema_yaml_data, default_flow_style=False) - - -folder_path = str(Path(__file__).parent.absolute()) -function_signatures, function_returns = get_function_signatures_in_folder(folder_path) - -UDFS = [func for func in function_signatures] - -UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path) -UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader) diff --git a/metagpt/tools/functions/schemas/data_preprocess.yml b/metagpt/tools/functions/schemas/data_preprocess.yml deleted file mode 100644 index 4de697abd..000000000 --- a/metagpt/tools/functions/schemas/data_preprocess.yml +++ /dev/null @@ -1,306 +0,0 @@ -FillMissingValue: - type: class - description: "Completing missing values with simple strategies" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - strategy: - type: str - description: "the imputation strategy, notice mean/median can only be used for numeric features" - default: mean - enum: - - mean - - median - - most_frequent - - constant - fill_value: - type: int - description: "fill_value is used to replace all occurrences of missing_values" - default: null - required: - - features - fit: - description: "Fit the FillMissingValue model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -MinMaxScale: - type: class - description: "Transform features by scaling each feature to a range, witch is (0, 1)" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the MinMaxScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -StandardScale: - type: class - description: "Standardize features by removing the mean and scaling to unit variance" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the StandardScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -MaxAbsScale: - type: class - description: "cale each feature by its maximum absolute value" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the MaxAbsScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -LabelEncode: - type: class - description: "Apply label encoding to specified categorical columns in-place." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "Categorical columns to be label encoded" - required: - - features - fit: - description: "Fit the LabelEncode model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -OneHotEncode: - type: class - description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "Categorical columns to be one-hot encoded and dropped" - required: - - features - fit: - description: "Fit the OneHotEncoding model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/functions/schemas/feature_engineering.yml b/metagpt/tools/functions/schemas/feature_engineering.yml deleted file mode 100644 index 62e6ad5b3..000000000 --- a/metagpt/tools/functions/schemas/feature_engineering.yml +++ /dev/null @@ -1,548 +0,0 @@ -PolynomialExpansion: - type: class - description: "Add polynomial and interaction features from selected numeric columns to input DataFrame." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - cols: - type: list - description: "Columns for polynomial expansion." - label_col: - type: str - description: "Label column name." - degree: - type: int - description: "The degree of the polynomial features." - default: 2 - required: - - cols - - label_col - fit: - description: "Fit the PolynomialExpansion model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame without duplicated columns." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame without duplicated columns." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -CatCount: - type: class - description: "Add value counts of a categorical column as new feature." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - col: - type: str - description: "Column for value counts." - required: - - col - fit: - description: "Fit the CatCount model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -TargetMeanEncoder: - type: class - description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - col: - type: str - description: "Column to be mean encoded." - label: - type: str - description: "Predicted label column." - required: - - col - - label - fit: - description: "Fit the TargetMeanEncoder model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -KFoldTargetMeanEncoder: - type: class - description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - col: - type: str - description: "Column to be k-fold mean encoded." - label: - type: str - description: "Predicted label column." - n_splits: - type: int - description: "Number of splits for K-fold." - default: 5 - random_state: - type: int - description: "Random seed." - default: 2021 - required: - - col - - label - fit: - description: "Fit the KFoldTargetMeanEncoder model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -CatCross: - type: class - description: "Add pairwise crossed features and convert them to numerical features." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - cols: - type: list - description: "Columns to be pairwise crossed, at least 2 columns." - max_cat_num: - type: int - description: "Maximum unique categories per crossed feature." - default: 100 - required: - - cols - fit: - description: "Fit the CatCross model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -GroupStat: - type: class - description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '__by_'." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - group_col: - type: str - description: "Column used for grouping." - agg_col: - type: str - description: "Column on which aggregation is performed." - agg_funcs: - type: list - description: >- - List of aggregation functions to apply, such as ['mean', 'std']. - Each function must be supported by pandas. - required: - - group_col - - agg_col - - agg_funcs - fit: - description: "Fit the GroupStat model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -SplitBins: - type: class - description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - cols: - type: list - description: "Columns to be binned inplace." - strategy: - type: str - description: "Strategy used to define the widths of the bins." - default: quantile - enum: - - quantile - - uniform - - kmeans - required: - - cols - fit: - description: "Fit the SplitBins model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -GeneralSelection: - type: class - description: "Drop all nan feats and feats with only one unique value." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - label_col: - type: str - description: "Label column name." - required: - - label_col - fit: - description: "Fit the GeneralSelection model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - - -TreeBasedSelection: - type: class - description: "Select features based on tree-based model and remove features with low importance." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - label_col: - type: str - description: "Label column name." - task_type: - type: str - description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression." - enum: - - cls - - mcls - - reg - required: - - label_col - - task_type - fit: - description: "Fit the TreeBasedSelection model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame contain label_col." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame contain label_col." - -VarianceBasedSelection: - type: class - description: "Select features based on variance and remove features with low variance." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - label_col: - type: str - description: "Label column name." - threshold: - type: float - description: "Threshold for variance." - default: 0.0 - required: - - label_col - fit: - description: "Fit the VarianceBasedSelection model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame contain label_col." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame contain label_col." \ No newline at end of file diff --git a/metagpt/tools/functions/libs/__init__.py b/metagpt/tools/libs/__init__.py similarity index 86% rename from metagpt/tools/functions/libs/__init__.py rename to metagpt/tools/libs/__init__.py index f0a61a7d9..3d74674aa 100644 --- a/metagpt/tools/functions/libs/__init__.py +++ b/metagpt/tools/libs/__init__.py @@ -4,7 +4,7 @@ # @Author : lidanyang # @File : __init__.py # @Desc : -from metagpt.tools.functions.libs import ( +from metagpt.tools.libs import ( data_preprocess, feature_engineering, ) diff --git a/metagpt/tools/functions/libs/data_preprocess.py b/metagpt/tools/libs/data_preprocess.py similarity index 96% rename from metagpt/tools/functions/libs/data_preprocess.py rename to metagpt/tools/libs/data_preprocess.py index 019ffd34e..7cc44263d 100644 --- a/metagpt/tools/functions/libs/data_preprocess.py +++ b/metagpt/tools/libs/data_preprocess.py @@ -13,13 +13,24 @@ from sklearn.preprocessing import ( StandardScaler, ) -from metagpt.tools.functions.libs.base import MLProcess from metagpt.tools.tool_data_type import ToolTypeEnum from metagpt.tools.tool_registry import register_tool TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value +class MLProcess(object): + def fit(self, df): + raise NotImplementedError + + def transform(self, df): + raise NotImplementedError + + def fit_transform(self, df): + self.fit(df) + return self.transform(df) + + @register_tool(tool_type_name=TOOL_TYPE) class FillMissingValue(MLProcess): def __init__( diff --git a/metagpt/tools/functions/libs/feature_engineering.py b/metagpt/tools/libs/feature_engineering.py similarity index 99% rename from metagpt/tools/functions/libs/feature_engineering.py rename to metagpt/tools/libs/feature_engineering.py index cd03592a6..ed5c1be72 100644 --- a/metagpt/tools/functions/libs/feature_engineering.py +++ b/metagpt/tools/libs/feature_engineering.py @@ -15,7 +15,7 @@ from sklearn.feature_selection import VarianceThreshold from sklearn.model_selection import KFold from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures -from metagpt.tools.functions.libs.base import MLProcess +from metagpt.tools.libs.data_preprocess import MLProcess from metagpt.tools.tool_data_type import ToolTypeEnum from metagpt.tools.tool_registry import register_tool diff --git a/metagpt/tools/sd_engine.py b/metagpt/tools/libs/sd_engine.py similarity index 98% rename from metagpt/tools/sd_engine.py rename to metagpt/tools/libs/sd_engine.py index 2e3f36ef8..ad63c2505 100644 --- a/metagpt/tools/sd_engine.py +++ b/metagpt/tools/libs/sd_engine.py @@ -53,7 +53,7 @@ payload = { default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution" -@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION) +@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION.value) class SDEngine: def __init__(self, sd_url=""): # Initialize the SDEngine with configuration diff --git a/metagpt/tools/functions/schemas/__init__.py b/metagpt/tools/schemas/__init__.py similarity index 100% rename from metagpt/tools/functions/schemas/__init__.py rename to metagpt/tools/schemas/__init__.py diff --git a/metagpt/tools/functions/schemas/data_preprocess/FillMissingValue.yml b/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/FillMissingValue.yml rename to metagpt/tools/schemas/data_preprocess/FillMissingValue.yml diff --git a/metagpt/tools/functions/schemas/data_preprocess/LabelEncode.yml b/metagpt/tools/schemas/data_preprocess/LabelEncode.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/LabelEncode.yml rename to metagpt/tools/schemas/data_preprocess/LabelEncode.yml diff --git a/metagpt/tools/functions/schemas/data_preprocess/MaxAbsScale.yml b/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/MaxAbsScale.yml rename to metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml diff --git a/metagpt/tools/functions/schemas/data_preprocess/MinMaxScale.yml b/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/MinMaxScale.yml rename to metagpt/tools/schemas/data_preprocess/MinMaxScale.yml diff --git a/metagpt/tools/functions/schemas/data_preprocess/OneHotEncode.yml b/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/OneHotEncode.yml rename to metagpt/tools/schemas/data_preprocess/OneHotEncode.yml diff --git a/metagpt/tools/functions/schemas/data_preprocess/StandardScale.yml b/metagpt/tools/schemas/data_preprocess/StandardScale.yml similarity index 100% rename from metagpt/tools/functions/schemas/data_preprocess/StandardScale.yml rename to metagpt/tools/schemas/data_preprocess/StandardScale.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/CatCount.yml b/metagpt/tools/schemas/feature_engineering/CatCount.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/CatCount.yml rename to metagpt/tools/schemas/feature_engineering/CatCount.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/CatCross.yml b/metagpt/tools/schemas/feature_engineering/CatCross.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/CatCross.yml rename to metagpt/tools/schemas/feature_engineering/CatCross.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/GeneralSelection.yml b/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/GeneralSelection.yml rename to metagpt/tools/schemas/feature_engineering/GeneralSelection.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/GroupStat.yml b/metagpt/tools/schemas/feature_engineering/GroupStat.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/GroupStat.yml rename to metagpt/tools/schemas/feature_engineering/GroupStat.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/KFoldTargetMeanEncoder.yml b/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/KFoldTargetMeanEncoder.yml rename to metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/PolynomialExpansion.yml b/metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/PolynomialExpansion.yml rename to metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/SplitBins.yml b/metagpt/tools/schemas/feature_engineering/SplitBins.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/SplitBins.yml rename to metagpt/tools/schemas/feature_engineering/SplitBins.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/TargetMeanEncoder.yml b/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/TargetMeanEncoder.yml rename to metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/TreeBasedSelection.yml b/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/TreeBasedSelection.yml rename to metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml diff --git a/metagpt/tools/functions/schemas/feature_engineering/VarianceBasedSelection.yml b/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering/VarianceBasedSelection.yml rename to metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml diff --git a/metagpt/tools/functions/schemas/stable_diffusion.yml b/metagpt/tools/schemas/stable_diffusion/SDEngine.yml similarity index 100% rename from metagpt/tools/functions/schemas/stable_diffusion.yml rename to metagpt/tools/schemas/stable_diffusion/SDEngine.yml diff --git a/tests/metagpt/tools/functions/__init__.py b/tests/metagpt/tools/functions/__init__.py deleted file mode 100644 index 7d36f3404..000000000 --- a/tests/metagpt/tools/functions/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/17 10:24 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/tests/metagpt/tools/functions/libs/__init__.py b/tests/metagpt/tools/libs/__init__.py similarity index 100% rename from tests/metagpt/tools/functions/libs/__init__.py rename to tests/metagpt/tools/libs/__init__.py diff --git a/tests/metagpt/tools/functions/libs/test_data_preprocess.py b/tests/metagpt/tools/libs/test_data_preprocess.py similarity index 97% rename from tests/metagpt/tools/functions/libs/test_data_preprocess.py rename to tests/metagpt/tools/libs/test_data_preprocess.py index 3c2d661ab..418f8adee 100644 --- a/tests/metagpt/tools/functions/libs/test_data_preprocess.py +++ b/tests/metagpt/tools/libs/test_data_preprocess.py @@ -5,7 +5,7 @@ import numpy.testing as npt import pandas as pd import pytest -from metagpt.tools.functions.libs.data_preprocess import ( +from metagpt.tools.libs.data_preprocess import ( FillMissingValue, LabelEncode, MaxAbsScale, diff --git a/tests/metagpt/tools/functions/libs/test_feature_engineering.py b/tests/metagpt/tools/libs/test_feature_engineering.py similarity index 97% rename from tests/metagpt/tools/functions/libs/test_feature_engineering.py rename to tests/metagpt/tools/libs/test_feature_engineering.py index 5b45aeb0c..3cfd5dacd 100644 --- a/tests/metagpt/tools/functions/libs/test_feature_engineering.py +++ b/tests/metagpt/tools/libs/test_feature_engineering.py @@ -3,7 +3,7 @@ import pandas as pd import pytest from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris -from metagpt.tools.functions.libs.feature_engineering import ( +from metagpt.tools.libs.feature_engineering import ( CatCount, CatCross, ExtractTimeComps, @@ -147,6 +147,7 @@ def test_general_selection(mock_dataset): assert "cat2" not in transformed.columns +@pytest.mark.skip # skip because TreeBasedSelection needs lgb as dependency def test_tree_based_selection(mock_dataset): # regression data = load_sklearn_data("housing") diff --git a/tests/metagpt/tools/functions/test_sd.py b/tests/metagpt/tools/libs/test_sd.py similarity index 93% rename from tests/metagpt/tools/functions/test_sd.py rename to tests/metagpt/tools/libs/test_sd.py index 142101cad..363cf96b9 100644 --- a/tests/metagpt/tools/functions/test_sd.py +++ b/tests/metagpt/tools/libs/test_sd.py @@ -4,7 +4,7 @@ # @Desc : import pytest -from metagpt.tools.sd_engine import SDEngine +from metagpt.tools.libs.sd_engine import SDEngine def test_sd_tools(): diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/libs/test_udf.py similarity index 95% rename from tests/metagpt/tools/functions/test_udf.py rename to tests/metagpt/tools/libs/test_udf.py index 741bd9a9f..19e523448 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/libs/test_udf.py @@ -3,7 +3,7 @@ import json import yaml from metagpt.logs import logger -from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml +from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml def test_udfs():