mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-24 14:15:17 +02:00
Merge branch 'tool_manage_new' into 'code_intepreter'
convert local class or function to tool, tool clarification at role initialization See merge request agents/data_agents_opt!55
This commit is contained in:
commit
7f5f95d41b
18 changed files with 807 additions and 147 deletions
|
|
@ -22,7 +22,8 @@ from metagpt.prompts.ml_engineer import (
|
|||
TOOL_USAGE_PROMPT,
|
||||
)
|
||||
from metagpt.schema import Message, Plan
|
||||
from metagpt.tools.tool_registry import TOOL_REGISTRY
|
||||
from metagpt.tools import TOOL_REGISTRY
|
||||
from metagpt.tools.tool_registry import validate_tool_names
|
||||
from metagpt.utils.common import create_func_config, remove_comments
|
||||
|
||||
|
||||
|
|
@ -90,30 +91,29 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
|
|||
class WriteCodeWithTools(BaseWriteAnalysisCode):
|
||||
"""Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
|
||||
|
||||
available_tools: dict = {}
|
||||
# selected tools to choose from, listed by their names. En empty list means selection from all tools.
|
||||
selected_tools: list[str] = []
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def _parse_recommend_tools(self, recommend_tools: list) -> dict:
|
||||
def _get_tools_by_type(self, tool_type: str) -> dict:
|
||||
"""
|
||||
Parses and validates a list of recommended tools, and retrieves their schema from registry.
|
||||
Retreive tools by tool type from registry, but filtered by pre-selected tool list
|
||||
|
||||
Args:
|
||||
recommend_tools (list): A list of recommended tools.
|
||||
tool_type (str): Tool type to retrieve from the registry
|
||||
|
||||
Returns:
|
||||
dict: A dict of valid tool schemas.
|
||||
dict: A dict of tool name to Tool object, representing available tools under the type
|
||||
"""
|
||||
valid_tools = []
|
||||
for tool_name in recommend_tools:
|
||||
if TOOL_REGISTRY.has_tool(tool_name):
|
||||
valid_tools.append(TOOL_REGISTRY.get_tool(tool_name))
|
||||
candidate_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
|
||||
if self.selected_tools:
|
||||
candidate_tools = {
|
||||
tool_name: candidate_tools[tool_name]
|
||||
for tool_name in self.selected_tools
|
||||
if tool_name in candidate_tools
|
||||
}
|
||||
return candidate_tools
|
||||
|
||||
tool_catalog = {tool.name: tool.schemas for tool in valid_tools}
|
||||
return tool_catalog
|
||||
|
||||
async def _tool_recommendation(
|
||||
async def _recommend_tool(
|
||||
self,
|
||||
task: str,
|
||||
code_steps: str,
|
||||
|
|
@ -128,7 +128,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
|
|||
available_tools (dict): the available tools description
|
||||
|
||||
Returns:
|
||||
list: recommended tools for the specified task
|
||||
dict: schemas of recommended tools for the specified task
|
||||
"""
|
||||
prompt = TOOL_RECOMMENDATION_PROMPT.format(
|
||||
current_task=task,
|
||||
|
|
@ -138,42 +138,62 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
|
|||
tool_config = create_func_config(SELECT_FUNCTION_TOOLS)
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
recommend_tools = rsp["recommend_tools"]
|
||||
return recommend_tools
|
||||
logger.info(f"Recommended tools: \n{recommend_tools}")
|
||||
|
||||
# Parses and validates the recommended tools, for LLM might hallucinate and recommend non-existing tools
|
||||
valid_tools = validate_tool_names(recommend_tools, return_tool_object=True)
|
||||
|
||||
tool_schemas = {tool.name: tool.schemas for tool in valid_tools}
|
||||
|
||||
return tool_schemas
|
||||
|
||||
async def _prepare_tools(self, plan: Plan) -> Tuple[dict, str]:
|
||||
"""Prepare tool schemas and usage instructions according to current task
|
||||
|
||||
Args:
|
||||
plan (Plan): The overall plan containing task information.
|
||||
|
||||
Returns:
|
||||
Tuple[dict, str]: A tool schemas ({tool_name: tool_schema_dict}) and a usage prompt for the type of tools selected
|
||||
"""
|
||||
# find tool type from task type through exact match, can extend to retrieval in the future
|
||||
tool_type = plan.current_task.task_type
|
||||
|
||||
# prepare tool-type-specific instruction
|
||||
tool_type_usage_prompt = (
|
||||
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
|
||||
)
|
||||
|
||||
# prepare schemas of available tools
|
||||
tool_schemas = {}
|
||||
available_tools = self._get_tools_by_type(tool_type)
|
||||
if available_tools:
|
||||
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
|
||||
code_steps = plan.current_task.code_steps
|
||||
tool_schemas = await self._recommend_tool(plan.current_task.instruction, code_steps, available_tools)
|
||||
|
||||
return tool_schemas, tool_type_usage_prompt
|
||||
|
||||
async def run(
|
||||
self,
|
||||
context: List[Message],
|
||||
plan: Plan = None,
|
||||
plan: Plan,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
tool_type = (
|
||||
plan.current_task.task_type
|
||||
) # find tool type from task type through exact match, can extend to retrieval in the future
|
||||
available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
|
||||
special_prompt = (
|
||||
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
|
||||
# prepare tool schemas and tool-type-specific instruction
|
||||
tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
|
||||
|
||||
# form a complete tool usage instruction and include it as a message in context
|
||||
tools_instruction = TOOL_USAGE_PROMPT.format(
|
||||
tool_schemas=tool_schemas, tool_type_usage_prompt=tool_type_usage_prompt
|
||||
)
|
||||
code_steps = plan.current_task.code_steps
|
||||
|
||||
tool_catalog = {}
|
||||
|
||||
if available_tools:
|
||||
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
|
||||
|
||||
recommend_tools = await self._tool_recommendation(
|
||||
plan.current_task.instruction, code_steps, available_tools
|
||||
)
|
||||
tool_catalog = self._parse_recommend_tools(recommend_tools)
|
||||
logger.info(f"Recommended tools: \n{recommend_tools}")
|
||||
|
||||
tools_instruction = TOOL_USAGE_PROMPT.format(special_prompt=special_prompt, tool_catalog=tool_catalog)
|
||||
|
||||
context.append(Message(content=tools_instruction, role="user"))
|
||||
|
||||
# prepare prompt & LLM call
|
||||
prompt = self.process_msg(context)
|
||||
|
||||
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
|
||||
return rsp
|
||||
|
||||
|
||||
|
|
@ -185,36 +205,25 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
|
|||
column_info: str = "",
|
||||
**kwargs,
|
||||
) -> Tuple[List[Message], str]:
|
||||
tool_type = (
|
||||
plan.current_task.task_type
|
||||
) # find tool type from task type through exact match, can extend to retrieval in the future
|
||||
available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
|
||||
special_prompt = (
|
||||
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
|
||||
)
|
||||
code_steps = plan.current_task.code_steps
|
||||
# prepare tool schemas and tool-type-specific instruction
|
||||
tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
|
||||
|
||||
# ML-specific variables to be used in prompt
|
||||
code_steps = plan.current_task.code_steps
|
||||
finished_tasks = plan.get_finished_tasks()
|
||||
code_context = [remove_comments(task.code) for task in finished_tasks]
|
||||
code_context = "\n\n".join(code_context)
|
||||
|
||||
if available_tools:
|
||||
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
|
||||
|
||||
recommend_tools = await self._tool_recommendation(
|
||||
plan.current_task.instruction, code_steps, available_tools
|
||||
)
|
||||
tool_catalog = self._parse_recommend_tools(recommend_tools)
|
||||
logger.info(f"Recommended tools: \n{recommend_tools}")
|
||||
|
||||
# prepare prompt depending on tool availability & LLM call
|
||||
if tool_schemas:
|
||||
prompt = ML_TOOL_USAGE_PROMPT.format(
|
||||
user_requirement=plan.goal,
|
||||
history_code=code_context,
|
||||
current_task=plan.current_task.instruction,
|
||||
column_info=column_info,
|
||||
special_prompt=special_prompt,
|
||||
tool_type_usage_prompt=tool_type_usage_prompt,
|
||||
code_steps=code_steps,
|
||||
tool_catalog=tool_catalog,
|
||||
tool_schemas=tool_schemas,
|
||||
)
|
||||
|
||||
else:
|
||||
|
|
@ -223,13 +232,15 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
|
|||
history_code=code_context,
|
||||
current_task=plan.current_task.instruction,
|
||||
column_info=column_info,
|
||||
special_prompt=special_prompt,
|
||||
tool_type_usage_prompt=tool_type_usage_prompt,
|
||||
code_steps=code_steps,
|
||||
)
|
||||
|
||||
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
|
||||
# Extra output to be used for potential debugging
|
||||
context = [Message(content=prompt, role="user")]
|
||||
|
||||
return context, rsp
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ Latest data info after previous tasks:
|
|||
|
||||
# Task
|
||||
Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
|
||||
Specifically, {special_prompt}
|
||||
Specifically, {tool_type_usage_prompt}
|
||||
|
||||
# Code Steps:
|
||||
Strictly follow steps below when you writing code if it's convenient.
|
||||
|
|
@ -192,7 +192,7 @@ model.fit(train, y_train)
|
|||
TOOL_USAGE_PROMPT = """
|
||||
# Instruction
|
||||
Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
|
||||
Specifically, {special_prompt}
|
||||
Specifically, {tool_type_usage_prompt}
|
||||
|
||||
# Capabilities
|
||||
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
|
||||
|
|
@ -200,7 +200,7 @@ Specifically, {special_prompt}
|
|||
|
||||
# Available Tools (can be empty):
|
||||
Each Class tool is described in JSON format. When you call a tool, import the tool first.
|
||||
{tool_catalog}
|
||||
{tool_schemas}
|
||||
|
||||
# Constraints:
|
||||
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
|
||||
|
|
@ -225,7 +225,7 @@ Latest data info after previous tasks:
|
|||
|
||||
# Task
|
||||
Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
|
||||
Specifically, {special_prompt}
|
||||
Specifically, {tool_type_usage_prompt}
|
||||
|
||||
# Code Steps:
|
||||
Strictly follow steps below when you writing code if it's convenient.
|
||||
|
|
@ -237,7 +237,7 @@ Strictly follow steps below when you writing code if it's convenient.
|
|||
|
||||
# Available Tools:
|
||||
Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
|
||||
{tool_catalog}
|
||||
{tool_schemas}
|
||||
|
||||
# Output Example:
|
||||
when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ class CodeInterpreter(Role):
|
|||
make_udfs: bool = False # whether to save user-defined functions
|
||||
use_code_steps: bool = False
|
||||
execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
|
||||
tools: list[str] = []
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
|
@ -27,13 +28,20 @@ class CodeInterpreter(Role):
|
|||
goal="",
|
||||
auto_run=True,
|
||||
use_tools=False,
|
||||
make_udfs=False,
|
||||
tools=[],
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
name=name, profile=profile, goal=goal, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs, **kwargs
|
||||
name=name, profile=profile, goal=goal, auto_run=auto_run, use_tools=use_tools, tools=tools, **kwargs
|
||||
)
|
||||
self._set_react_mode(react_mode="plan_and_act", auto_run=auto_run, use_tools=use_tools)
|
||||
if use_tools and tools:
|
||||
from metagpt.tools.tool_registry import (
|
||||
validate_tool_names, # import upon use
|
||||
)
|
||||
|
||||
self.tools = validate_tool_names(tools)
|
||||
logger.info(f"will only use {self.tools} as tools")
|
||||
|
||||
@property
|
||||
def working_memory(self):
|
||||
|
|
@ -92,7 +100,7 @@ class CodeInterpreter(Role):
|
|||
return code["code"], result, success
|
||||
|
||||
async def _write_code(self):
|
||||
todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools()
|
||||
todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
|
||||
logger.info(f"ready to {todo.name}")
|
||||
|
||||
context = self.planner.get_useful_memories()
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class MLEngineer(CodeInterpreter):
|
|||
column_info = await self._update_data_columns()
|
||||
|
||||
logger.info("Write code with tools")
|
||||
tool_context, code = await WriteCodeWithToolsML().run(
|
||||
tool_context, code = await WriteCodeWithToolsML(selected_tools=self.tools).run(
|
||||
context=[], # context assembled inside the Action
|
||||
plan=self.planner.plan,
|
||||
column_info=column_info,
|
||||
|
|
|
|||
|
|
@ -477,7 +477,7 @@ class Role(SerializationMixin, is_polymorphic_base=True):
|
|||
|
||||
else:
|
||||
# update plan according to user's feedback and to take on changed tasks
|
||||
await self.planner.update_plan(review)
|
||||
await self.planner.update_plan()
|
||||
|
||||
completed_plan_memory = self.planner.get_useful_memories() # completed plan as a outcome
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from metagpt.tools.libs import (
|
|||
feature_engineering,
|
||||
sd_engine,
|
||||
gpt_v_generator,
|
||||
web_scrapping,
|
||||
web_scraping,
|
||||
)
|
||||
|
||||
_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping # Avoid pre-commit error
|
||||
_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scraping # Avoid pre-commit error
|
||||
|
|
|
|||
|
|
@ -26,31 +26,64 @@ class MLProcess(object):
|
|||
def transform(self, df):
|
||||
raise NotImplementedError
|
||||
|
||||
def fit_transform(self, df):
|
||||
def fit_transform(self, df) -> pd.DataFrame:
|
||||
"""
|
||||
Fit and transform the input DataFrame.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
self.fit(df)
|
||||
return self.transform(df)
|
||||
|
||||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class FillMissingValue(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
strategy: str = "mean",
|
||||
fill_value=None,
|
||||
):
|
||||
"""
|
||||
Completing missing values with simple strategies.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list, strategy: str = "mean", fill_value=None):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
|
||||
be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
|
||||
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
|
||||
Defaults to None.
|
||||
"""
|
||||
self.features = features
|
||||
self.strategy = strategy
|
||||
self.fill_value = fill_value
|
||||
self.si = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the FillMissingValue model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
if len(self.features) == 0:
|
||||
return
|
||||
self.si = SimpleImputer(strategy=self.strategy, fill_value=self.fill_value)
|
||||
self.si.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
if len(self.features) == 0:
|
||||
return df
|
||||
new_df = df.copy()
|
||||
|
|
@ -60,18 +93,40 @@ class FillMissingValue(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class MinMaxScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Transform features by scaling each feature to a range, which is (0, 1).
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
"""
|
||||
self.features = features
|
||||
self.mms = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the MinMaxScale model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
self.mms = MinMaxScaler()
|
||||
self.mms.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
new_df = df.copy()
|
||||
new_df[self.features] = self.mms.transform(new_df[self.features])
|
||||
return new_df
|
||||
|
|
@ -79,18 +134,40 @@ class MinMaxScale(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class StandardScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Standardize features by removing the mean and scaling to unit variance.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
"""
|
||||
self.features = features
|
||||
self.ss = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the StandardScale model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
self.ss = StandardScaler()
|
||||
self.ss.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
new_df = df.copy()
|
||||
new_df[self.features] = self.ss.transform(new_df[self.features])
|
||||
return new_df
|
||||
|
|
@ -98,18 +175,40 @@ class StandardScale(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class MaxAbsScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Scale each feature by its maximum absolute value.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
"""
|
||||
self.features = features
|
||||
self.mas = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the MaxAbsScale model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
self.mas = MaxAbsScaler()
|
||||
self.mas.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
new_df = df.copy()
|
||||
new_df[self.features] = self.mas.transform(new_df[self.features])
|
||||
return new_df
|
||||
|
|
@ -117,18 +216,40 @@ class MaxAbsScale(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class RobustScale(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Apply the RobustScaler to scale features using statistics that are robust to outliers.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize the RobustScale instance with feature names.
|
||||
|
||||
Args:
|
||||
features (list): List of feature names to be scaled.
|
||||
"""
|
||||
self.features = features
|
||||
self.rs = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Compute the median and IQR for scaling.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Dataframe containing the features.
|
||||
"""
|
||||
self.rs = RobustScaler()
|
||||
self.rs.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
"""
|
||||
Scale features using the previously computed median and IQR.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Dataframe containing the features to be scaled.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: A new dataframe with scaled features.
|
||||
"""
|
||||
new_df = df.copy()
|
||||
new_df[self.features] = self.rs.transform(new_df[self.features])
|
||||
return new_df
|
||||
|
|
@ -136,18 +257,40 @@ class RobustScale(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class OrdinalEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Encode categorical features as ordinal integers.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize the OrdinalEncode instance with feature names.
|
||||
|
||||
Args:
|
||||
features (list): List of categorical feature names to be encoded.
|
||||
"""
|
||||
self.features = features
|
||||
self.oe = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Learn the ordinal encodings for the features.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Dataframe containing the categorical features.
|
||||
"""
|
||||
self.oe = OrdinalEncoder()
|
||||
self.oe.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
"""
|
||||
Convert the categorical features to ordinal integers.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Dataframe containing the categorical features to be encoded.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: A new dataframe with the encoded features.
|
||||
"""
|
||||
new_df = df.copy()
|
||||
new_df[self.features] = self.oe.transform(new_df[self.features])
|
||||
return new_df
|
||||
|
|
@ -155,18 +298,40 @@ class OrdinalEncode(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class OneHotEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Apply one-hot encoding to specified categorical columns, the original columns will be dropped.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Categorical columns to be one-hot encoded and dropped.
|
||||
"""
|
||||
self.features = features
|
||||
self.ohe = None
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the OneHotEncoding model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
self.ohe = OneHotEncoder(handle_unknown="ignore", sparse=False)
|
||||
self.ohe.fit(df[self.features])
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
ts_data = self.ohe.transform(df[self.features])
|
||||
new_columns = self.ohe.get_feature_names_out(self.features)
|
||||
ts_data = pd.DataFrame(ts_data, columns=new_columns, index=df.index)
|
||||
|
|
@ -177,21 +342,43 @@ class OneHotEncode(MLProcess):
|
|||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
class LabelEncode(MLProcess):
|
||||
def __init__(
|
||||
self,
|
||||
features: list,
|
||||
):
|
||||
"""
|
||||
Apply label encoding to specified categorical columns in-place.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Categorical columns to be label encoded.
|
||||
"""
|
||||
self.features = features
|
||||
self.le_encoders = []
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the LabelEncode model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
if len(self.features) == 0:
|
||||
return
|
||||
for col in self.features:
|
||||
le = LabelEncoder().fit(df[col].astype(str).unique().tolist() + ["unknown"])
|
||||
self.le_encoders.append(le)
|
||||
|
||||
def transform(self, df: pd.DataFrame):
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
if len(self.features) == 0:
|
||||
return df
|
||||
new_df = df.copy()
|
||||
|
|
@ -204,8 +391,17 @@ class LabelEncode(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
def get_column_info(df: pd.DataFrame) -> dict:
|
||||
"""
|
||||
Analyzes a DataFrame and categorizes its columns based on data types.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The DataFrame to be analyzed.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
|
||||
Each key corresponds to a list of column names belonging to that category.
|
||||
"""
|
||||
column_info = {
|
||||
"Category": [],
|
||||
"Numeric": [],
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ class SplitBins(MLProcess):
|
|||
return new_df
|
||||
|
||||
|
||||
@register_tool(tool_type=TOOL_TYPE)
|
||||
# @register_tool(tool_type=TOOL_TYPE)
|
||||
class ExtractTimeComps(MLProcess):
|
||||
def __init__(self, time_col: str, time_comps: list):
|
||||
self.time_col = time_col
|
||||
|
|
@ -242,6 +242,7 @@ class GeneralSelection(MLProcess):
|
|||
|
||||
|
||||
# skip for now because lgb is needed
|
||||
# @register_tool(tool_type=TOOL_TYPE)
|
||||
class TreeBasedSelection(MLProcess):
|
||||
def __init__(self, label_col: str, task_type: str):
|
||||
self.label_col = label_col
|
||||
|
|
|
|||
46
metagpt/tools/schemas/data_preprocess/OrdinalEncode.yml
Normal file
46
metagpt/tools/schemas/data_preprocess/OrdinalEncode.yml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
OrdinalEncode:
|
||||
type: class
|
||||
description: Encode categorical features as ordinal integers.
|
||||
methods:
|
||||
__init__:
|
||||
description: 'Initialize the OrdinalEncode instance with feature names. '
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: List of categorical feature names to be encoded.
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: 'Learn the ordinal encodings for the features. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: Dataframe containing the categorical features.
|
||||
required:
|
||||
- df
|
||||
fit_transform:
|
||||
description: 'Fit and transform the input DataFrame. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: The input DataFrame.
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
- type: pd.DataFrame
|
||||
description: The transformed DataFrame.
|
||||
transform:
|
||||
description: 'Convert the categorical features to ordinal integers. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: Dataframe containing the categorical features to be encoded.
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
- type: pd.DataFrame
|
||||
description: A new dataframe with the encoded features.
|
||||
47
metagpt/tools/schemas/data_preprocess/RobustScale.yml
Normal file
47
metagpt/tools/schemas/data_preprocess/RobustScale.yml
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
RobustScale:
|
||||
type: class
|
||||
description: Apply the RobustScaler to scale features using statistics that are
|
||||
robust to outliers.
|
||||
methods:
|
||||
__init__:
|
||||
description: 'Initialize the RobustScale instance with feature names. '
|
||||
parameters:
|
||||
properties:
|
||||
features:
|
||||
type: list
|
||||
description: List of feature names to be scaled.
|
||||
required:
|
||||
- features
|
||||
fit:
|
||||
description: 'Compute the median and IQR for scaling. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: Dataframe containing the features.
|
||||
required:
|
||||
- df
|
||||
fit_transform:
|
||||
description: 'Fit and transform the input DataFrame. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: The input DataFrame.
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
- type: pd.DataFrame
|
||||
description: The transformed DataFrame.
|
||||
transform:
|
||||
description: 'Scale features using the previously computed median and IQR. '
|
||||
parameters:
|
||||
properties:
|
||||
df:
|
||||
type: pd.DataFrame
|
||||
description: Dataframe containing the features to be scaled.
|
||||
required:
|
||||
- df
|
||||
returns:
|
||||
- type: pd.DataFrame
|
||||
description: A new dataframe with scaled features.
|
||||
72
metagpt/tools/tool_convert.py
Normal file
72
metagpt/tools/tool_convert.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
import inspect
|
||||
|
||||
from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces
|
||||
|
||||
|
||||
def convert_code_to_tool_schema(obj, include: list[str] = []):
|
||||
docstring = inspect.getdoc(obj)
|
||||
assert docstring, "no docstring found for the objects, skip registering"
|
||||
|
||||
if inspect.isclass(obj):
|
||||
schema = {"type": "class", "description": remove_spaces(docstring), "methods": {}}
|
||||
for name, method in inspect.getmembers(obj, inspect.isfunction):
|
||||
if include and name not in include:
|
||||
continue
|
||||
method_doc = inspect.getdoc(method)
|
||||
if method_doc:
|
||||
schema["methods"][name] = docstring_to_schema(method_doc)
|
||||
|
||||
elif inspect.isfunction(obj):
|
||||
schema = {
|
||||
"type": "function",
|
||||
**docstring_to_schema(docstring),
|
||||
}
|
||||
|
||||
schema = {obj.__name__: schema}
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def docstring_to_schema(docstring: str):
|
||||
if docstring is None:
|
||||
return {}
|
||||
|
||||
parser = GoogleDocstringParser(docstring=docstring)
|
||||
|
||||
# 匹配简介部分
|
||||
description = parser.parse_desc()
|
||||
|
||||
# 匹配Args部分
|
||||
params = parser.parse_params()
|
||||
parameter_schema = {"properties": {}, "required": []}
|
||||
for param in params:
|
||||
param_name, param_type, param_desc = param
|
||||
# check required or optional
|
||||
is_optional, param_type = parser.check_and_parse_optional(param_type)
|
||||
if not is_optional:
|
||||
parameter_schema["required"].append(param_name)
|
||||
# type and desc
|
||||
param_dict = {"type": param_type, "description": remove_spaces(param_desc)}
|
||||
# match Default for optional args
|
||||
has_default_val, default_val = parser.check_and_parse_default_value(param_desc)
|
||||
if has_default_val:
|
||||
param_dict["default"] = default_val
|
||||
# match Enum
|
||||
has_enum, enum_vals = parser.check_and_parse_enum(param_desc)
|
||||
if has_enum:
|
||||
param_dict["enum"] = enum_vals
|
||||
# add to parameter schema
|
||||
parameter_schema["properties"].update({param_name: param_dict})
|
||||
|
||||
# 匹配Returns部分
|
||||
returns = parser.parse_returns()
|
||||
|
||||
# 构建YAML字典
|
||||
schema = {
|
||||
"description": description,
|
||||
"parameters": parameter_schema,
|
||||
}
|
||||
if returns:
|
||||
schema["returns"] = [{"type": ret[0], "description": remove_spaces(ret[1])} for ret in returns]
|
||||
|
||||
return schema
|
||||
|
|
@ -11,17 +11,18 @@ import re
|
|||
from collections import defaultdict
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.const import TOOL_SCHEMA_PATH
|
||||
from metagpt.logs import logger
|
||||
from metagpt.tools.tool_convert import convert_code_to_tool_schema
|
||||
from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolType
|
||||
|
||||
|
||||
class ToolRegistry:
|
||||
def __init__(self):
|
||||
self.tools = {}
|
||||
self.tool_types = {}
|
||||
self.tools_by_types = defaultdict(dict) # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
|
||||
class ToolRegistry(BaseModel):
|
||||
tools: dict = {}
|
||||
tool_types: dict = {}
|
||||
tools_by_types: dict = defaultdict(dict) # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
|
||||
|
||||
def register_tool_type(self, tool_type: ToolType):
|
||||
self.tool_types[tool_type.name] = tool_type
|
||||
|
|
@ -34,7 +35,9 @@ class ToolRegistry:
|
|||
schema_path=None,
|
||||
tool_code="",
|
||||
tool_type="other",
|
||||
make_schema_if_not_exists=False,
|
||||
tool_source_object=None,
|
||||
include_functions=[],
|
||||
make_schema_if_not_exists=True,
|
||||
):
|
||||
if self.has_tool(tool_name):
|
||||
return
|
||||
|
|
@ -44,14 +47,16 @@ class ToolRegistry:
|
|||
if not os.path.exists(schema_path):
|
||||
if make_schema_if_not_exists:
|
||||
logger.warning(f"no schema found, will make schema at {schema_path}")
|
||||
make_schema(tool_code, schema_path)
|
||||
schema_dict = make_schema(tool_source_object, include_functions, schema_path)
|
||||
else:
|
||||
logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}")
|
||||
return
|
||||
|
||||
with open(schema_path, "r", encoding="utf-8") as f:
|
||||
schema_dict = yaml.safe_load(f)
|
||||
schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
|
||||
else:
|
||||
with open(schema_path, "r", encoding="utf-8") as f:
|
||||
schema_dict = yaml.safe_load(f)
|
||||
if not schema_dict:
|
||||
return
|
||||
schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
|
||||
schemas["tool_path"] = tool_path # corresponding code file path of the tool
|
||||
try:
|
||||
ToolSchema(**schemas) # validation
|
||||
|
|
@ -65,22 +70,22 @@ class ToolRegistry:
|
|||
self.tools_by_types[tool_type][tool_name] = tool
|
||||
logger.info(f"{tool_name} registered")
|
||||
|
||||
def has_tool(self, key):
|
||||
def has_tool(self, key: str) -> Tool:
|
||||
return key in self.tools
|
||||
|
||||
def get_tool(self, key):
|
||||
def get_tool(self, key) -> Tool:
|
||||
return self.tools.get(key)
|
||||
|
||||
def get_tools_by_type(self, key):
|
||||
return self.tools_by_types.get(key)
|
||||
def get_tools_by_type(self, key) -> dict[str, Tool]:
|
||||
return self.tools_by_types.get(key, {})
|
||||
|
||||
def has_tool_type(self, key):
|
||||
def has_tool_type(self, key) -> bool:
|
||||
return key in self.tool_types
|
||||
|
||||
def get_tool_type(self, key):
|
||||
def get_tool_type(self, key) -> ToolType:
|
||||
return self.tool_types.get(key)
|
||||
|
||||
def get_tool_types(self):
|
||||
def get_tool_types(self) -> dict[str, ToolType]:
|
||||
return self.tool_types
|
||||
|
||||
|
||||
|
|
@ -94,7 +99,7 @@ def register_tool_type(cls):
|
|||
return cls
|
||||
|
||||
|
||||
def register_tool(tool_name="", tool_type="other", schema_path=None):
|
||||
def register_tool(tool_name="", tool_type="other", schema_path=None, **kwargs):
|
||||
"""register a tool to registry"""
|
||||
|
||||
def decorator(cls, tool_name=tool_name):
|
||||
|
|
@ -112,15 +117,39 @@ def register_tool(tool_name="", tool_type="other", schema_path=None):
|
|||
schema_path=schema_path,
|
||||
tool_code=source_code,
|
||||
tool_type=tool_type,
|
||||
tool_source_object=cls,
|
||||
**kwargs,
|
||||
)
|
||||
return cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def make_schema(tool_code, path):
|
||||
def make_schema(tool_source_object, include, path):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True) # Create the necessary directories
|
||||
schema = {} # an empty schema for now
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(schema, f)
|
||||
return path
|
||||
try:
|
||||
schema = convert_code_to_tool_schema(tool_source_object, include=include)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(schema, f, sort_keys=False)
|
||||
# import json
|
||||
# with open(str(path).replace("yml", "json"), "w", encoding="utf-8") as f:
|
||||
# json.dump(schema, f, ensure_ascii=False, indent=4)
|
||||
logger.info(f"schema made at {path}")
|
||||
except Exception as e:
|
||||
schema = {}
|
||||
logger.error(f"Fail to make schema: {e}")
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def validate_tool_names(tools: list[str], return_tool_object=False) -> list[str]:
|
||||
valid_tools = []
|
||||
for tool_name in tools:
|
||||
if not TOOL_REGISTRY.has_tool(tool_name):
|
||||
logger.warning(
|
||||
f"Specified tool {tool_name} not found and was skipped. Check if you have registered it properly"
|
||||
)
|
||||
else:
|
||||
valid_tool = TOOL_REGISTRY.get_tool(tool_name) if return_tool_object else tool_name
|
||||
valid_tools.append(valid_tool)
|
||||
return valid_tools
|
||||
|
|
|
|||
87
metagpt/utils/parse_docstring.py
Normal file
87
metagpt/utils/parse_docstring.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import re
|
||||
from typing import Tuple
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def remove_spaces(text):
|
||||
return re.sub(r"\s+", " ", text)
|
||||
|
||||
|
||||
class DocstringParser(BaseModel):
|
||||
docstring: str
|
||||
|
||||
def parse_desc(self) -> str:
|
||||
"""Parse and return the description from the docstring."""
|
||||
|
||||
def parse_params(self) -> list[Tuple[str, str, str]]:
|
||||
"""Parse and return the parameters from the docstring.
|
||||
|
||||
Returns:
|
||||
list[Tuple[str, str, str]]: A list of input paramter info. Each info is a triple of (param name, param type, param description)
|
||||
"""
|
||||
|
||||
def parse_returns(self) -> list[Tuple[str, str]]:
|
||||
"""Parse and return the output information from the docstring.
|
||||
|
||||
Returns:
|
||||
list[Tuple[str, str]]: A list of output info. Each info is a tuple of (return type, return description)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
|
||||
"""Check if a parameter is optional and return a processed param_type rid of the optionality info if so"""
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
|
||||
"""Check if a parameter has a default value and return the default value if so"""
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
|
||||
"""Check if a parameter description includes an enum and return enum values if so"""
|
||||
|
||||
|
||||
class reSTDocstringParser(DocstringParser):
|
||||
"""A parser for reStructuredText (reST) docstring"""
|
||||
|
||||
|
||||
class GoogleDocstringParser(DocstringParser):
|
||||
"""A parser for Google-stype docstring"""
|
||||
|
||||
docstring: str
|
||||
|
||||
def parse_desc(self) -> str:
|
||||
description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", self.docstring, re.DOTALL)
|
||||
description = remove_spaces(description_match.group(1)) if description_match else ""
|
||||
return description
|
||||
|
||||
def parse_params(self) -> list[Tuple[str, str, str]]:
|
||||
args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", self.docstring, re.DOTALL)
|
||||
_args = args_match.group(1).strip() if args_match else ""
|
||||
# variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
|
||||
variable_pattern = re.compile(
|
||||
r"(\w+)\s*\((.*?)\):\s*(.*?)(?=\n\s*\w+\s*\(|\Z)", re.DOTALL
|
||||
) # (?=\n\w+\s*\(|\Z) is to assert that what follows is either the start of the next parameter (indicated by a newline, some word characters, and an opening parenthesis) or the end of the string (\Z).
|
||||
params = variable_pattern.findall(_args)
|
||||
return params
|
||||
|
||||
def parse_returns(self) -> list[Tuple[str, str]]:
|
||||
returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", self.docstring, re.DOTALL)
|
||||
returns = returns_match.group(1).strip() if returns_match else ""
|
||||
return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
|
||||
returns = return_pattern.findall(returns)
|
||||
return returns
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
|
||||
return "optional" in param_type, param_type.replace(", optional", "")
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
|
||||
default_val = re.search(r"Defaults to (.+?)\.", param_desc)
|
||||
return (True, default_val.group(1)) if default_val else (False, "")
|
||||
|
||||
@staticmethod
|
||||
def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
|
||||
enum_val = re.search(r"Enum: \[(.+?)\]", param_desc)
|
||||
return (True, [e.strip() for e in enum_val.group(1).split(",")]) if enum_val else (False, [])
|
||||
|
|
@ -10,7 +10,7 @@ from metagpt.utils.recovery_util import load_history, save_history
|
|||
|
||||
|
||||
async def run_code_interpreter(
|
||||
role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
|
||||
role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir, tools
|
||||
):
|
||||
"""
|
||||
The main function to run the MLEngineer with optional history loading.
|
||||
|
|
@ -25,7 +25,9 @@ async def run_code_interpreter(
|
|||
"""
|
||||
|
||||
if role_class == "ci":
|
||||
role = CodeInterpreter(goal=requirement, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs)
|
||||
role = CodeInterpreter(
|
||||
goal=requirement, auto_run=auto_run, use_tools=use_tools, make_udfs=make_udfs, tools=tools
|
||||
)
|
||||
else:
|
||||
role = MLEngineer(
|
||||
goal=requirement,
|
||||
|
|
@ -33,7 +35,7 @@ async def run_code_interpreter(
|
|||
use_tools=use_tools,
|
||||
use_code_steps=use_code_steps,
|
||||
make_udfs=make_udfs,
|
||||
use_udfs=use_udfs,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
if save_dir:
|
||||
|
|
@ -73,6 +75,8 @@ if __name__ == "__main__":
|
|||
use_tools = True
|
||||
make_udfs = False
|
||||
use_udfs = False
|
||||
tools = []
|
||||
# tools = ["FillMissingValue", "CatCross", "non_existing_test"]
|
||||
|
||||
async def main(
|
||||
role_class: str = role_class,
|
||||
|
|
@ -83,9 +87,10 @@ if __name__ == "__main__":
|
|||
make_udfs: bool = make_udfs,
|
||||
use_udfs: bool = use_udfs,
|
||||
save_dir: str = save_dir,
|
||||
tools=tools,
|
||||
):
|
||||
await run_code_interpreter(
|
||||
role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
|
||||
role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir, tools
|
||||
)
|
||||
|
||||
fire.Fire(main)
|
||||
|
|
|
|||
158
tests/metagpt/tools/test_tool_convert.py
Normal file
158
tests/metagpt/tools/test_tool_convert.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import pandas as pd
|
||||
|
||||
from metagpt.tools.tool_convert import convert_code_to_tool_schema, docstring_to_schema
|
||||
|
||||
|
||||
def test_docstring_to_schema():
|
||||
docstring = """
|
||||
Some test desc.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only be
|
||||
used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
|
||||
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
|
||||
Defaults to None.
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
expected = {
|
||||
"description": " Some test desc. ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"features": {"type": "list", "description": "Columns to be processed."},
|
||||
"strategy": {
|
||||
"type": "str",
|
||||
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
|
||||
"default": "'mean'",
|
||||
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
|
||||
},
|
||||
"fill_value": {
|
||||
"type": "int",
|
||||
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
|
||||
"default": "None",
|
||||
},
|
||||
},
|
||||
"required": ["features"],
|
||||
},
|
||||
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
|
||||
}
|
||||
schema = docstring_to_schema(docstring)
|
||||
assert schema == expected
|
||||
|
||||
|
||||
class DummyClass:
|
||||
"""
|
||||
Completing missing values with simple strategies.
|
||||
"""
|
||||
|
||||
def __init__(self, features: list, strategy: str = "mean", fill_value=None):
|
||||
"""
|
||||
Initialize self.
|
||||
|
||||
Args:
|
||||
features (list): Columns to be processed.
|
||||
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
|
||||
be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
|
||||
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
|
||||
Defaults to None.
|
||||
"""
|
||||
pass
|
||||
|
||||
def fit(self, df: pd.DataFrame):
|
||||
"""
|
||||
Fit the FillMissingValue model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
"""
|
||||
pass
|
||||
|
||||
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Transform the input DataFrame with the fitted model.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The input DataFrame.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: The transformed DataFrame.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def dummy_fn(df: pd.DataFrame) -> dict:
|
||||
"""
|
||||
Analyzes a DataFrame and categorizes its columns based on data types.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): The DataFrame to be analyzed.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
|
||||
Each key corresponds to a list of column names belonging to that category.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def test_convert_code_to_tool_schema_class():
|
||||
expected = {
|
||||
"DummyClass": {
|
||||
"type": "class",
|
||||
"description": "Completing missing values with simple strategies.",
|
||||
"methods": {
|
||||
"__init__": {
|
||||
"description": "Initialize self. ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"features": {"type": "list", "description": "Columns to be processed."},
|
||||
"strategy": {
|
||||
"type": "str",
|
||||
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
|
||||
"default": "'mean'",
|
||||
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
|
||||
},
|
||||
"fill_value": {
|
||||
"type": "int",
|
||||
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
|
||||
"default": "None",
|
||||
},
|
||||
},
|
||||
"required": ["features"],
|
||||
},
|
||||
},
|
||||
"fit": {
|
||||
"description": "Fit the FillMissingValue model. ",
|
||||
"parameters": {
|
||||
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
|
||||
"required": ["df"],
|
||||
},
|
||||
},
|
||||
"transform": {
|
||||
"description": "Transform the input DataFrame with the fitted model. ",
|
||||
"parameters": {
|
||||
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
|
||||
"required": ["df"],
|
||||
},
|
||||
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
schema = convert_code_to_tool_schema(DummyClass)
|
||||
assert schema == expected
|
||||
|
||||
|
||||
def test_convert_code_to_tool_schema_function():
|
||||
expected = {
|
||||
"dummy_fn": {
|
||||
"type": "function",
|
||||
"description": "Analyzes a DataFrame and categorizes its columns based on data types. ",
|
||||
"parameters": {
|
||||
"properties": {"df": {"type": "pd.DataFrame", "description": "The DataFrame to be analyzed."}},
|
||||
"required": ["df"],
|
||||
},
|
||||
}
|
||||
}
|
||||
schema = convert_code_to_tool_schema(dummy_fn)
|
||||
assert schema == expected
|
||||
|
|
@ -98,4 +98,4 @@ def test_get_tools_by_type(tool_registry, schema_yaml):
|
|||
# Test case for when the tool type does not exist
|
||||
def test_get_tools_by_nonexistent_type(tool_registry):
|
||||
tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
|
||||
assert tools_by_type is None
|
||||
assert not tools_by_type
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue