moving files

This commit is contained in:
yzlin 2024-01-15 11:07:29 +08:00
parent d7ab4d315d
commit c8da839afe
38 changed files with 27 additions and 1022 deletions

View file

@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp"
SOURCE_ROOT = METAGPT_ROOT / "metagpt"
PROMPT_PATH = SOURCE_ROOT / "prompts"
SKILL_DIRECTORY = SOURCE_ROOT / "skills"
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas"
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs"
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas"
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs"
# REAL CONSTS

View file

@ -15,7 +15,7 @@ Keep dataset column information updated before model train.
# Task
Update and print the dataset's column information only if the train or test data has changed. Use the following code:
```python
from metagpt.tools.functions.libs.data_preprocess import get_column_info
from metagpt.tools.libs.data_preprocess import get_column_info
column_info = get_column_info(df)
print("column_info")
@ -248,7 +248,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie
```python
# Step 1: fill missing value
# Tools used: ['FillMissingValue']
from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
from metagpt.tools.libs.data_preprocess import FillMissingValue
train_processed = train.copy()
test_processed = test.copy()

View file

@ -8,7 +8,7 @@
from enum import Enum
from metagpt.tools import tool_types # this registers all tool types
from metagpt.tools.functions import libs # this registers all tools
from metagpt.tools import libs # this registers all tools
from metagpt.tools.tool_registry import TOOL_REGISTRY
_ = tool_types # Avoid pre-commit error

View file

@ -1,6 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/16 16:32
# @Author : lidanyang
# @File : __init__.py
# @Desc :

View file

@ -1,16 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/12/10 20:12
# @Author : lidanyang
# @File : base
# @Desc :
class MLProcess(object):
def fit(self, df):
raise NotImplementedError
def transform(self, df):
raise NotImplementedError
def fit_transform(self, df):
self.fit(df)
return self.transform(df)

View file

@ -1,126 +0,0 @@
import ast
import os
import re
import yaml
import inspect
import importlib
from pathlib import Path
from typing import List
from metagpt.logs import logger
def extract_function_signatures(file_path):
with open(file_path, "r", encoding="utf-8") as file:
source_code = file.read()
tree = ast.parse(source_code)
function_signatures = []
function_returns = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 只提取用户自定义函数,排除内置函数
if not (node.name.startswith("__") and node.name.endswith("__")):
# 获取函数名
function_name = node.name
# 获取参数列表
args = [arg.arg for arg in node.args.args]
# 获取函数签名
function_signature = f"{function_name}({', '.join(args)})"
# 导入函数
module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)]
module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}")
# 将函数导入到当前命名空间
globals().update({function_name: getattr(module, function_name)})
# 获取函数注释和函数路径
function_schema = {
"udf_name": function_signature,
"udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}",
"udf_doc": inspect.getdoc(getattr(module, function_name)),
}
function_signatures.append(function_schema)
# 获取函数返回变量名
source_lines, _ = inspect.getsourcelines(getattr(module, function_name))
for line in source_lines:
if line.strip().startswith("return "):
function_returns.append(
{
"udf_name": function_name,
"udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")],
}
)
break
# 没有返回值的函数
if not function_returns or function_returns[-1]["udf_name"] != function_name:
function_returns.append({"udf_name": function_name, "udf_returns": [None]})
return function_signatures, function_returns
def get_function_signatures_in_folder(folder_path):
python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"]
all_function_signatures = []
all_function_returns = []
for file_name in python_files:
file_path = os.path.join(folder_path, file_name)
function_signatures, function_returns = extract_function_signatures(file_path)
all_function_signatures.extend(function_signatures)
all_function_returns.extend(function_returns)
return all_function_signatures, all_function_returns
# Create Tools Yaml Style Schema
def docstring_to_yaml(docstring: str, return_vars: List[str] = None):
logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n")
if docstring is None:
return {}
# 匹配简介部分
description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL)
description = description_match.group(1).strip() if description_match else ""
# 匹配Args部分
args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL)
_args = args_match.group(1).strip() if args_match else ""
variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
params = variable_pattern.findall(_args)
if not params:
params = ((None, None, None),)
# 匹配Returns部分
returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL)
returns = returns_match.group(1).strip() if returns_match else ""
return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
# 添加返回值变量名
return_vars = return_vars if isinstance(return_vars, list) else [return_vars]
returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)]
# 构建YAML字典
yaml_data = {
"description": description.strip(".").strip(),
"parameters": {
"properties": {
param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None
},
"required": [param[0] for param in params if param[0] is not None],
},
"returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns},
}
return yaml_data
def extract_function_schema_yaml_in_folder(folder_path: str):
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
function_schema_yaml_data = {}
for func_docstring, func_returns in zip(function_signatures, function_returns):
if func_docstring["udf_doc"]:
fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"])
fun_yaml_data.update({"type": "function"})
function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data})
return yaml.dump(function_schema_yaml_data, default_flow_style=False)
folder_path = str(Path(__file__).parent.absolute())
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
UDFS = [func for func in function_signatures]
UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path)
UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader)

View file

@ -1,306 +0,0 @@
FillMissingValue:
type: class
description: "Completing missing values with simple strategies"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
strategy:
type: str
description: "the imputation strategy, notice mean/median can only be used for numeric features"
default: mean
enum:
- mean
- median
- most_frequent
- constant
fill_value:
type: int
description: "fill_value is used to replace all occurrences of missing_values"
default: null
required:
- features
fit:
description: "Fit the FillMissingValue model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MinMaxScale:
type: class
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MinMaxScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
StandardScale:
type: class
description: "Standardize features by removing the mean and scaling to unit variance"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the StandardScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MaxAbsScale:
type: class
description: "cale each feature by its maximum absolute value"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MaxAbsScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
LabelEncode:
type: class
description: "Apply label encoding to specified categorical columns in-place."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be label encoded"
required:
- features
fit:
description: "Fit the LabelEncode model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
OneHotEncode:
type: class
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be one-hot encoded and dropped"
required:
- features
fit:
description: "Fit the OneHotEncoding model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -1,548 +0,0 @@
PolynomialExpansion:
type: class
description: "Add polynomial and interaction features from selected numeric columns to input DataFrame."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns for polynomial expansion."
label_col:
type: str
description: "Label column name."
degree:
type: int
description: "The degree of the polynomial features."
default: 2
required:
- cols
- label_col
fit:
description: "Fit the PolynomialExpansion model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame without duplicated columns."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame without duplicated columns."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
CatCount:
type: class
description: "Add value counts of a categorical column as new feature."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column for value counts."
required:
- col
fit:
description: "Fit the CatCount model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
TargetMeanEncoder:
type: class
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be mean encoded."
label:
type: str
description: "Predicted label column."
required:
- col
- label
fit:
description: "Fit the TargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
KFoldTargetMeanEncoder:
type: class
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be k-fold mean encoded."
label:
type: str
description: "Predicted label column."
n_splits:
type: int
description: "Number of splits for K-fold."
default: 5
random_state:
type: int
description: "Random seed."
default: 2021
required:
- col
- label
fit:
description: "Fit the KFoldTargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
CatCross:
type: class
description: "Add pairwise crossed features and convert them to numerical features."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be pairwise crossed, at least 2 columns."
max_cat_num:
type: int
description: "Maximum unique categories per crossed feature."
default: 100
required:
- cols
fit:
description: "Fit the CatCross model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
GroupStat:
type: class
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
group_col:
type: str
description: "Column used for grouping."
agg_col:
type: str
description: "Column on which aggregation is performed."
agg_funcs:
type: list
description: >-
List of aggregation functions to apply, such as ['mean', 'std'].
Each function must be supported by pandas.
required:
- group_col
- agg_col
- agg_funcs
fit:
description: "Fit the GroupStat model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
SplitBins:
type: class
description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be binned inplace."
strategy:
type: str
description: "Strategy used to define the widths of the bins."
default: quantile
enum:
- quantile
- uniform
- kmeans
required:
- cols
fit:
description: "Fit the SplitBins model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
GeneralSelection:
type: class
description: "Drop all nan feats and feats with only one unique value."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
required:
- label_col
fit:
description: "Fit the GeneralSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
TreeBasedSelection:
type: class
description: "Select features based on tree-based model and remove features with low importance."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
task_type:
type: str
description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
enum:
- cls
- mcls
- reg
required:
- label_col
- task_type
fit:
description: "Fit the TreeBasedSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."
VarianceBasedSelection:
type: class
description: "Select features based on variance and remove features with low variance."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
threshold:
type: float
description: "Threshold for variance."
default: 0.0
required:
- label_col
fit:
description: "Fit the VarianceBasedSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."

View file

@ -4,7 +4,7 @@
# @Author : lidanyang
# @File : __init__.py
# @Desc :
from metagpt.tools.functions.libs import (
from metagpt.tools.libs import (
data_preprocess,
feature_engineering,
)

View file

@ -13,13 +13,24 @@ from sklearn.preprocessing import (
StandardScaler,
)
from metagpt.tools.functions.libs.base import MLProcess
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value
class MLProcess(object):
def fit(self, df):
raise NotImplementedError
def transform(self, df):
raise NotImplementedError
def fit_transform(self, df):
self.fit(df)
return self.transform(df)
@register_tool(tool_type_name=TOOL_TYPE)
class FillMissingValue(MLProcess):
def __init__(

View file

@ -15,7 +15,7 @@ from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from metagpt.tools.functions.libs.base import MLProcess
from metagpt.tools.libs.data_preprocess import MLProcess
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool

View file

@ -53,7 +53,7 @@ payload = {
default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"
@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION)
@register_tool(tool_type_name=ToolTypeEnum.STABLE_DIFFUSION.value)
class SDEngine:
def __init__(self, sd_url=""):
# Initialize the SDEngine with configuration