mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
moving files
This commit is contained in:
parent
d7ab4d315d
commit
c8da839afe
38 changed files with 27 additions and 1022 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -173,6 +173,7 @@ tests/metagpt/utils/file_repo_git
|
|||
*.png
|
||||
htmlcov
|
||||
htmlcov.*
|
||||
cov.xml
|
||||
*.dot
|
||||
*.pkl
|
||||
*-structure.csv
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@
|
|||
1. HTML Layout: Outputs the HTML code for the page.
|
||||
1. CSS Styles (styles.css): Outputs the CSS code for the page.
|
||||
|
||||
1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/sd_engine.
|
||||
1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/libs/sd_engine.py.
|
||||
|
||||
1. Configuration instructions for SD Skills: The SD interface is currently deployed based on *https://github.com/AUTOMATIC1111/stable-diffusion-webui* **For environmental configurations and model downloads, please refer to the aforementioned GitHub repository. To initiate the SD service that supports API calls, run the command specified in cmd with the parameter nowebui, i.e.,
|
||||
|
||||
|
|
|
|||
|
|
@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp"
|
|||
SOURCE_ROOT = METAGPT_ROOT / "metagpt"
|
||||
PROMPT_PATH = SOURCE_ROOT / "prompts"
|
||||
SKILL_DIRECTORY = SOURCE_ROOT / "skills"
|
||||
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas"
|
||||
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs"
|
||||
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas"
|
||||
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs"
|
||||
|
||||
|
||||
# REAL CONSTS
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ Keep dataset column information updated before model train.
|
|||
# Task
|
||||
Update and print the dataset's column information only if the train or test data has changed. Use the following code:
|
||||
```python
|
||||
from metagpt.tools.functions.libs.data_preprocess import get_column_info
|
||||
from metagpt.tools.libs.data_preprocess import get_column_info
|
||||
|
||||
column_info = get_column_info(df)
|
||||
print("column_info")
|
||||
|
|
@ -248,7 +248,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie
|
|||
```python
|
||||
# Step 1: fill missing value
|
||||
# Tools used: ['FillMissingValue']
|
||||
from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
|
||||
from metagpt.tools.libs.data_preprocess import FillMissingValue
|
||||
|
||||
train_processed = train.copy()
|
||||
test_processed = test.copy()
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
from enum import Enum
|
||||
from metagpt.tools import tool_types # this registers all tool types
|
||||
from metagpt.tools.functions import libs # this registers all tools
|
||||
from metagpt.tools import libs # this registers all tools
|
||||
from metagpt.tools.tool_registry import TOOL_REGISTRY
|
||||
|
||||
_ = tool_types # Avoid pre-commit error
|
||||
|
|
|
|||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2023/11/16 16:32
|
||||
# @Author : lidanyang
|
||||
# @File : __init__.py
|
||||
# @Desc :
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2023/12/10 20:12
|
||||
# @Author : lidanyang
|
||||
# @File : base
|
||||
# @Desc :
|
||||
class MLProcess(object):
|
||||
def fit(self, df):
|
||||
raise NotImplementedError
|
||||
|
||||
def transform(self, df):
|
||||
raise NotImplementedError
|
||||
|
||||
def fit_transform(self, df):
|
||||
self.fit(df)
|
||||
return self.transform(df)
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
import ast
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import inspect
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
def extract_function_signatures(file_path):
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
source_code = file.read()
|
||||
|
||||
tree = ast.parse(source_code)
|
||||
function_signatures = []
|
||||
function_returns = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
# 只提取用户自定义函数,排除内置函数
|
||||
if not (node.name.startswith("__") and node.name.endswith("__")):
|
||||
# 获取函数名
|
||||
function_name = node.name
|
||||
# 获取参数列表
|
||||
args = [arg.arg for arg in node.args.args]
|
||||
# 获取函数签名
|
||||
function_signature = f"{function_name}({', '.join(args)})"
|
||||
# 导入函数
|
||||
module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)]
|
||||
module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}")
|
||||
# 将函数导入到当前命名空间
|
||||
globals().update({function_name: getattr(module, function_name)})
|
||||
# 获取函数注释和函数路径
|
||||
function_schema = {
|
||||
"udf_name": function_signature,
|
||||
"udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}",
|
||||
"udf_doc": inspect.getdoc(getattr(module, function_name)),
|
||||
}
|
||||
function_signatures.append(function_schema)
|
||||
# 获取函数返回变量名
|
||||
source_lines, _ = inspect.getsourcelines(getattr(module, function_name))
|
||||
for line in source_lines:
|
||||
if line.strip().startswith("return "):
|
||||
function_returns.append(
|
||||
{
|
||||
"udf_name": function_name,
|
||||
"udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")],
|
||||
}
|
||||
)
|
||||
break
|
||||
|
||||
# 没有返回值的函数
|
||||
if not function_returns or function_returns[-1]["udf_name"] != function_name:
|
||||
function_returns.append({"udf_name": function_name, "udf_returns": [None]})
|
||||
return function_signatures, function_returns
|
||||
|
||||
|
||||
def get_function_signatures_in_folder(folder_path):
|
||||
python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"]
|
||||
all_function_signatures = []
|
||||
all_function_returns = []
|
||||
|
||||
for file_name in python_files:
|
||||
file_path = os.path.join(folder_path, file_name)
|
||||
function_signatures, function_returns = extract_function_signatures(file_path)
|
||||
all_function_signatures.extend(function_signatures)
|
||||
all_function_returns.extend(function_returns)
|
||||
return all_function_signatures, all_function_returns
|
||||
|
||||
|
||||
# Create Tools Yaml Style Schema
|
||||
def docstring_to_yaml(docstring: str, return_vars: List[str] = None):
|
||||
logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n")
|
||||
if docstring is None:
|
||||
return {}
|
||||
# 匹配简介部分
|
||||
description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL)
|
||||
description = description_match.group(1).strip() if description_match else ""
|
||||
|
||||
# 匹配Args部分
|
||||
args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL)
|
||||
_args = args_match.group(1).strip() if args_match else ""
|
||||
variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
|
||||
params = variable_pattern.findall(_args)
|
||||
if not params:
|
||||
params = ((None, None, None),)
|
||||
# 匹配Returns部分
|
||||
returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL)
|
||||
returns = returns_match.group(1).strip() if returns_match else ""
|
||||
return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
|
||||
# 添加返回值变量名
|
||||
return_vars = return_vars if isinstance(return_vars, list) else [return_vars]
|
||||
returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)]
|
||||
# 构建YAML字典
|
||||
yaml_data = {
|
||||
"description": description.strip(".").strip(),
|
||||
"parameters": {
|
||||
"properties": {
|
||||
param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None
|
||||
},
|
||||
"required": [param[0] for param in params if param[0] is not None],
|
||||
},
|
||||
"returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns},
|
||||
}
|
||||
return yaml_data
|
||||
|
||||
|
||||
def extract_function_schema_yaml_in_folder(folder_path: str):
|
||||
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
|
||||
function_schema_yaml_data = {}
|
||||
for func_docstring, func_returns in zip(function_signatures, function_returns):
|
||||
if func_docstring["udf_doc"]:
|
||||
fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"])
|
||||
fun_yaml_data.update({"type": "function"})
|
||||
function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data})
|
||||
return yaml.dump(function_schema_yaml_data, default_flow_style=False)
|
||||
|
||||
|
||||
folder_path = str(Path(__file__).parent.absolute())
|
||||
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
|
||||
|
||||
UDFS = [func for func in function_signatures]
|
||||
|
||||
UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path)
|
||||
UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader)
|
||||
|
|
@ -1,306 +0,0 @@
|
|||
FillMissingValue:
|
||||
type: class
|
||||
description: "Completing missing values with simple strategies"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
strategy:
|
||||
type: str
|
||||
description: "the imputation strategy, notice mean/median can only be used for numeric features"
|
||||
default: mean
|
||||
enum:
|
||||
- mean
|
||||
- median
|
||||
- most_frequent
|
||||
- constant
|
||||
fill_value:
|
||||
type: int
|
||||
description: "fill_value is used to replace all occurrences of missing_values"
|
||||
default: null
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the FillMissingValue model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
MinMaxScale:
|
||||
type: class
|
||||
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MinMaxScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
StandardScale:
|
||||
type: class
|
||||
description: "Standardize features by removing the mean and scaling to unit variance"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the StandardScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
MaxAbsScale:
|
||||
type: class
|
||||
description: "cale each feature by its maximum absolute value"
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "columns to be processed"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the MaxAbsScale model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
LabelEncode:
|
||||
type: class
|
||||
description: "Apply label encoding to specified categorical columns in-place."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be label encoded"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the LabelEncode model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
OneHotEncode:
|
||||
type: class
|
||||
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: "Categorical columns to be one-hot encoded and dropped"
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: "Fit the OneHotEncoding model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
|
@ -1,548 +0,0 @@
|
|||
PolynomialExpansion:
|
||||
type: class
|
||||
description: "Add polynomial and interaction features from selected numeric columns to input DataFrame."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns for polynomial expansion."
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
degree:
|
||||
type: int
|
||||
description: "The degree of the polynomial features."
|
||||
default: 2
|
||||
required:
|
||||
- cols
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the PolynomialExpansion model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame without duplicated columns."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame without duplicated columns."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCount:
|
||||
type: class
|
||||
description: "Add value counts of a categorical column as new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column for value counts."
|
||||
required:
|
||||
- col
|
||||
fit:
|
||||
description: "Fit the CatCount model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
TargetMeanEncoder:
|
||||
type: class
|
||||
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the TargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
KFoldTargetMeanEncoder:
|
||||
type: class
|
||||
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
col:
|
||||
type: str
|
||||
description: "Column to be k-fold mean encoded."
|
||||
label:
|
||||
type: str
|
||||
description: "Predicted label column."
|
||||
n_splits:
|
||||
type: int
|
||||
description: "Number of splits for K-fold."
|
||||
default: 5
|
||||
random_state:
|
||||
type: int
|
||||
description: "Random seed."
|
||||
default: 2021
|
||||
required:
|
||||
- col
|
||||
- label
|
||||
fit:
|
||||
description: "Fit the KFoldTargetMeanEncoder model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
CatCross:
|
||||
type: class
|
||||
description: "Add pairwise crossed features and convert them to numerical features."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be pairwise crossed, at least 2 columns."
|
||||
max_cat_num:
|
||||
type: int
|
||||
description: "Maximum unique categories per crossed feature."
|
||||
default: 100
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the CatCross model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GroupStat:
|
||||
type: class
|
||||
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
group_col:
|
||||
type: str
|
||||
description: "Column used for grouping."
|
||||
agg_col:
|
||||
type: str
|
||||
description: "Column on which aggregation is performed."
|
||||
agg_funcs:
|
||||
type: list
|
||||
description: >-
|
||||
List of aggregation functions to apply, such as ['mean', 'std'].
|
||||
Each function must be supported by pandas.
|
||||
required:
|
||||
- group_col
|
||||
- agg_col
|
||||
- agg_funcs
|
||||
fit:
|
||||
description: "Fit the GroupStat model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
SplitBins:
|
||||
type: class
|
||||
description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
cols:
|
||||
type: list
|
||||
description: "Columns to be binned inplace."
|
||||
strategy:
|
||||
type: str
|
||||
description: "Strategy used to define the widths of the bins."
|
||||
default: quantile
|
||||
enum:
|
||||
- quantile
|
||||
- uniform
|
||||
- kmeans
|
||||
required:
|
||||
- cols
|
||||
fit:
|
||||
description: "Fit the SplitBins model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
GeneralSelection:
|
||||
type: class
|
||||
description: "Drop all nan feats and feats with only one unique value."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the GeneralSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame."
|
||||
|
||||
|
||||
TreeBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on tree-based model and remove features with low importance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
task_type:
|
||||
type: str
|
||||
description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
|
||||
enum:
|
||||
- cls
|
||||
- mcls
|
||||
- reg
|
||||
required:
|
||||
- label_col
|
||||
- task_type
|
||||
fit:
|
||||
description: "Fit the TreeBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
|
||||
VarianceBasedSelection:
|
||||
type: class
|
||||
description: "Select features based on variance and remove features with low variance."
|
||||
methods:
|
||||
__init__:
|
||||
description: "Initialize self."
|
||||
parameters:
|
||||
properties:
|
||||
label_col:
|
||||
type: str
|
||||
description: "Label column name."
|
||||
threshold:
|
||||
type: float
|
||||
description: "Threshold for variance."
|
||||
default: 0.0
|
||||
required:
|
||||
- label_col
|
||||
fit:
|
||||
description: "Fit the VarianceBasedSelection model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
transform:
|
||||
description: "Transform the input DataFrame with the fitted model."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
fit_transform:
|
||||
description: "Fit and transform the input DataFrame."
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The input DataFrame."
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
df:
|
||||
type: DataFrame
|
||||
description: "The transformed DataFrame contain label_col."
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
# @Author : lidanyang
|
||||
# @File : __init__.py
|
||||
# @Desc :
|
||||
from metagpt.tools.functions.libs import (
|
||||
from metagpt.tools.libs import (
|
||||
data_preprocess,
|
||||
feature_engineering,
|
||||
)
|
||||
|
|
@ -13,13 +13,24 @@ from sklearn.preprocessing import (
|
|||
StandardScaler,
|
||||
)
|
||||
|
||||
from metagpt.tools.functions.libs.base import MLProcess
|
||||
from metagpt.tools.tool_data_type import ToolTypeEnum
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
|
||||
TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value
|
||||
|
||||
|
||||
class MLProcess(object):
|
||||
def fit(self, df):
|
||||
raise NotImplementedError
|
||||
|
||||
def transform(self, df):
|
||||
raise NotImplementedError
|
||||
|
||||
def fit_transform(self, df):
|
||||
self.fit(df)
|
||||
return self.transform(df)
|
||||
|
||||
|
||||
@register_tool(tool_type_name=TOOL_TYPE)
|
||||
class FillMissingValue(MLProcess):
|
||||
def __init__(
|
||||
|
|
@ -15,7 +15,7 @@ from sklearn.feature_selection import VarianceThreshold
|
|||
from sklearn.model_selection import KFold
|
||||
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
|
||||
|
||||
from metagpt.tools.functions.libs.base import MLProcess
|
||||
from metagpt.tools.libs.data_preprocess import MLProcess
|
||||
from metagpt.tools.tool_data_type import ToolTypeEnum
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ payload = {
|
|||
default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"
|
||||
|
||||
|
||||
@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION)
|
||||
@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION.value)
|
||||
class SDEngine:
|
||||
def __init__(self, sd_url=""):
|
||||
# Initialize the SDEngine with configuration
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2023/11/17 10:24
|
||||
# @Author : lidanyang
|
||||
# @File : __init__.py
|
||||
# @Desc :
|
||||
|
|
@ -5,7 +5,7 @@ import numpy.testing as npt
|
|||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from metagpt.tools.functions.libs.data_preprocess import (
|
||||
from metagpt.tools.libs.data_preprocess import (
|
||||
FillMissingValue,
|
||||
LabelEncode,
|
||||
MaxAbsScale,
|
||||
|
|
@ -3,7 +3,7 @@ import pandas as pd
|
|||
import pytest
|
||||
from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris
|
||||
|
||||
from metagpt.tools.functions.libs.feature_engineering import (
|
||||
from metagpt.tools.libs.feature_engineering import (
|
||||
CatCount,
|
||||
CatCross,
|
||||
ExtractTimeComps,
|
||||
|
|
@ -147,6 +147,7 @@ def test_general_selection(mock_dataset):
|
|||
assert "cat2" not in transformed.columns
|
||||
|
||||
|
||||
@pytest.mark.skip # skip because TreeBasedSelection needs lgb as dependency
|
||||
def test_tree_based_selection(mock_dataset):
|
||||
# regression
|
||||
data = load_sklearn_data("housing")
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
# @Desc :
|
||||
import pytest
|
||||
|
||||
from metagpt.tools.sd_engine import SDEngine
|
||||
from metagpt.tools.libs.sd_engine import SDEngine
|
||||
|
||||
|
||||
def test_sd_tools():
|
||||
|
|
@ -3,7 +3,7 @@ import json
|
|||
import yaml
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
|
||||
from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
|
||||
|
||||
|
||||
def test_udfs():
|
||||
Loading…
Add table
Add a link
Reference in a new issue