Merge branch 'tool_manage_new' into 'code_intepreter'

Add tool registry, minimize MLEngineer

See merge request agents/data_agents_opt!50
This commit is contained in:
林义章 2024-01-18 16:11:41 +00:00
commit c7d46d0b0f
57 changed files with 1335 additions and 787 deletions

1
.gitignore vendored
View file

@ -173,6 +173,7 @@ tests/metagpt/utils/file_repo_git
*.png
htmlcov
htmlcov.*
cov.xml
*.dot
*.pkl
*-structure.csv

View file

@ -130,7 +130,7 @@
1. HTML Layout Outputs the HTML code for the page.
1. CSS Styles (styles.css) Outputs the CSS code for the page.
1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/sd_engine.
1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/libs/sd_engine.py.
1. Configuration instructions for SD Skills: The SD interface is currently deployed based on *https://github.com/AUTOMATIC1111/stable-diffusion-webui* **For environmental configurations and model downloads, please refer to the aforementioned GitHub repository. To initiate the SD service that supports API calls, run the command specified in cmd with the parameter nowebui, i.e.,

View file

@ -119,5 +119,4 @@ class DebugCode(BaseWriteAnalysisCode):
runtime_result=runtime_result,
)
# 根据reflection结果重写代码
improv_code = reflection["improved_impl"]
return improv_code
return {"code": reflection["improved_impl"]}

View file

@ -8,11 +8,9 @@ import re
from pathlib import Path
from typing import Dict, List, Tuple, Union
import yaml
from tenacity import retry, stop_after_attempt, wait_fixed
from metagpt.actions import Action
from metagpt.const import TOOL_SCHEMA_PATH
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.prompts.ml_engineer import (
@ -24,12 +22,9 @@ from metagpt.prompts.ml_engineer import (
TOOL_USAGE_PROMPT,
)
from metagpt.schema import Message, Plan
from metagpt.tools import TOOL_TYPE_MAPPINGS
from metagpt.tools.tool_registry import TOOL_REGISTRY
from metagpt.utils.common import create_func_config, remove_comments
TOOL_TYPE_MODULE = {k: v.module for k, v in TOOL_TYPE_MAPPINGS.items()}
TOOL_TYPE_USAGE_PROMPT = {k: v.usage_prompt for k, v in TOOL_TYPE_MAPPINGS.items()}
class BaseWriteAnalysisCode(Action):
DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
@ -64,7 +59,7 @@ class BaseWriteAnalysisCode(Action):
}
return messages
async def run(self, context: List[Message], plan: Plan = None) -> str:
async def run(self, context: List[Message], plan: Plan = None) -> dict:
"""Run of a code writing action, used in data analysis or modeling
Args:
@ -72,7 +67,7 @@ class BaseWriteAnalysisCode(Action):
plan (Plan, optional): Overall plan. Defaults to None.
Returns:
str: The code string.
dict: code result in the format of {"code": "print('hello world')", "language": "python"}
"""
@ -95,49 +90,27 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
class WriteCodeWithTools(BaseWriteAnalysisCode):
"""Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
schema_path: Union[Path, str] = TOOL_SCHEMA_PATH
available_tools: dict = {}
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._load_tools(self.schema_path)
def _load_tools(self, schema_path, schema_module=None):
"""Load tools from yaml file"""
if isinstance(schema_path, dict):
schema_module = schema_module or "udf"
self.available_tools.update({schema_module: schema_path})
else:
if isinstance(schema_path, list):
yml_files = schema_path
elif isinstance(schema_path, Path) and schema_path.is_file():
yml_files = [schema_path]
else:
yml_files = schema_path.glob("*.yml")
for yml_file in yml_files:
module = yml_file.stem
with open(yml_file, "r", encoding="utf-8") as f:
self.available_tools[module] = yaml.safe_load(f)
def _parse_recommend_tools(self, module: str, recommend_tools: list) -> dict:
def _parse_recommend_tools(self, recommend_tools: list) -> dict:
"""
Parses and validates a list of recommended tools, and retrieves their schema from registry.
Args:
module (str): The module name for querying tools in the registry.
recommend_tools (list): A list of recommended tools.
Returns:
dict: A dict of valid tool schemas.
"""
valid_tools = []
available_tools = self.available_tools[module].keys()
for tool in recommend_tools:
if tool in available_tools:
valid_tools.append(tool)
for tool_name in recommend_tools:
if TOOL_REGISTRY.has_tool(tool_name):
valid_tools.append(TOOL_REGISTRY.get_tool(tool_name))
tool_catalog = {tool: self.available_tools[module][tool] for tool in valid_tools}
tool_catalog = {tool.name: tool.schemas for tool in valid_tools}
return tool_catalog
async def _tool_recommendation(
@ -176,31 +149,24 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
tool_type = (
plan.current_task.task_type
) # find tool type from task type through exact match, can extend to retrieval in the future
available_tools = self.available_tools.get(tool_type, {})
special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
special_prompt = (
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
)
code_steps = plan.current_task.code_steps
finished_tasks = plan.get_finished_tasks()
code_context = [remove_comments(task.code) for task in finished_tasks]
code_context = "\n\n".join(code_context)
tool_catalog = {}
module_name = ""
if len(available_tools) > 0:
available_tools = {k: v["description"] for k, v in available_tools.items()}
if available_tools:
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
recommend_tools = await self._tool_recommendation(
plan.current_task.instruction, code_steps, available_tools
)
tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
tool_catalog = self._parse_recommend_tools(recommend_tools)
logger.info(f"Recommended tools: \n{recommend_tools}")
module_name = TOOL_TYPE_MODULE[tool_type]
tools_instruction = TOOL_USAGE_PROMPT.format(
special_prompt=special_prompt, module_name=module_name, tool_catalog=tool_catalog
)
tools_instruction = TOOL_USAGE_PROMPT.format(special_prompt=special_prompt, tool_catalog=tool_catalog)
context.append(Message(content=tools_instruction, role="user"))
@ -208,7 +174,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)
return rsp["code"]
return rsp
class WriteCodeWithToolsML(WriteCodeWithTools):
@ -219,26 +185,28 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
column_info: str = "",
**kwargs,
) -> Tuple[List[Message], str]:
tool_type = plan.current_task.task_type
available_tools = self.available_tools.get(tool_type, {})
special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
tool_type = (
plan.current_task.task_type
) # find tool type from task type through exact match, can extend to retrieval in the future
available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
special_prompt = (
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
)
code_steps = plan.current_task.code_steps
finished_tasks = plan.get_finished_tasks()
code_context = [remove_comments(task.code) for task in finished_tasks]
code_context = "\n\n".join(code_context)
if len(available_tools) > 0:
available_tools = {k: v["description"] for k, v in available_tools.items()}
if available_tools:
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
recommend_tools = await self._tool_recommendation(
plan.current_task.instruction, code_steps, available_tools
)
tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
tool_catalog = self._parse_recommend_tools(recommend_tools)
logger.info(f"Recommended tools: \n{recommend_tools}")
module_name = TOOL_TYPE_MODULE[tool_type]
prompt = ML_TOOL_USAGE_PROMPT.format(
user_requirement=plan.goal,
history_code=code_context,
@ -246,7 +214,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
column_info=column_info,
special_prompt=special_prompt,
code_steps=code_steps,
module_name=module_name,
tool_catalog=tool_catalog,
)
@ -263,7 +230,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)
context = [Message(content=prompt, role="user")]
return context, rsp["code"]
return context, rsp
class MakeTools(WriteCodeByGenerate):

View file

@ -12,7 +12,7 @@ from metagpt.actions import Action
from metagpt.logs import logger
from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT
from metagpt.schema import Message, Plan, Task
from metagpt.tools import TOOL_TYPE_MAPPINGS
from metagpt.tools import TOOL_REGISTRY
from metagpt.utils.common import CodeParser, create_func_config
@ -47,13 +47,16 @@ class WritePlan(Action):
List[Dict]: tasks with task type assigned
"""
task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
task_type_desc = "\n".join([f"- **{item.name}**: {item.desc}" for item in TOOL_TYPE_MAPPINGS.values()])
task_type_desc = "\n".join(
[f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()]
) # task type are binded with tool type now, should be improved in the future
prompt = ASSIGN_TASK_TYPE_PROMPT.format(
task_list=task_list, task_type_desc=task_type_desc
) # task types are set to be the same as tool types, for now
tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG)
rsp = await self.llm.aask_code(prompt, **tool_config)
task_type_list = rsp["task_type"]
print(f"assigned task types: {task_type_list}")
for task, task_type in zip(tasks, task_type_list):
task["task_type"] = task_type
return json.dumps(tasks)

View file

@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp"
SOURCE_ROOT = METAGPT_ROOT / "metagpt"
PROMPT_PATH = SOURCE_ROOT / "prompts"
SKILL_DIRECTORY = SOURCE_ROOT / "skills"
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas"
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs"
TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas"
TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs"
# REAL CONSTS

View file

@ -15,7 +15,7 @@ Keep dataset column information updated before model train.
# Task
Update and print the dataset's column information only if the train or test data has changed. Use the following code:
```python
from metagpt.tools.functions.libs.data_preprocess import get_column_info
from metagpt.tools.libs.data_preprocess import get_column_info
column_info = get_column_info(df)
print("column_info")
@ -134,16 +134,12 @@ PRINT_DATA_COLUMNS = {
"parameters": {
"type": "object",
"properties": {
"is_update": {
"type": "boolean",
"description": "Whether need to update the column info.",
},
"code": {
"type": "string",
"description": "The code to be added to a new cell in jupyter.",
},
},
"required": ["is_update", "code"],
"required": ["code"],
},
}
@ -203,7 +199,7 @@ Specifically, {special_prompt}
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
# Available Tools (can be empty):
Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
Each Class tool is described in JSON format. When you call a tool, import the tool first.
{tool_catalog}
# Constraints:
@ -240,7 +236,7 @@ Strictly follow steps below when you writing code if it's convenient.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
# Available Tools:
Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
{tool_catalog}
# Output Example:
@ -248,7 +244,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie
```python
# Step 1: fill missing value
# Tools used: ['FillMissingValue']
from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
from metagpt.tools.libs.data_preprocess import FillMissingValue
train_processed = train.copy()
test_processed = test.copy()

View file

@ -39,7 +39,7 @@ The current task is about evaluating a model, please note the following:
"""
# Prompt for using tools of "vision" type
VISION_PROMPT = """
IMAGE2WEBPAGE_PROMPT = """
The current task is about converting image into webpage code. please note the following:
- Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.
"""
"""

View file

@ -5,6 +5,7 @@ from pydantic import Field
from metagpt.actions.ask_review import ReviewConst
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
from metagpt.actions.write_code_steps import WriteCodeSteps
from metagpt.logs import logger
from metagpt.roles import Role
from metagpt.roles.tool_maker import ToolMaker
@ -16,6 +17,7 @@ class CodeInterpreter(Role):
auto_run: bool = True
use_tools: bool = False
make_udfs: bool = False # whether to save user-defined functions
use_code_steps: bool = False
execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
def __init__(
@ -52,10 +54,14 @@ class CodeInterpreter(Role):
async def _act_on_task(self, current_task: Task) -> TaskResult:
code, result, is_success = await self._write_and_exec_code()
task_result = TaskResult(code=code['code'], result=result, is_success=is_success)
task_result = TaskResult(code=code, result=result, is_success=is_success)
return task_result
async def _write_and_exec_code(self, max_retry: int = 3):
self.planner.current_task.code_steps = (
await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else ""
)
counter = 0
success = False
@ -63,7 +69,7 @@ class CodeInterpreter(Role):
### write code ###
code, cause_by = await self._write_code()
self.working_memory.add(Message(content=code['code'], role="assistant", cause_by=cause_by))
self.working_memory.add(Message(content=code["code"], role="assistant", cause_by=cause_by))
### execute code ###
result, success = await self.execute_code.run(**code)
@ -72,7 +78,7 @@ class CodeInterpreter(Role):
self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode))
### process execution result ###
if "!pip" in code:
if "!pip" in code["code"]:
success = False
counter += 1
@ -83,17 +89,15 @@ class CodeInterpreter(Role):
if ReviewConst.CHANGE_WORD[0] in review:
counter = 0 # redo the task again with help of human suggestions
return code, result, success
return code["code"], result, success
async def _write_code(self):
todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools()
logger.info(f"ready to {todo.name}")
context = self.planner.get_useful_memories()
# print(*context, sep="\n***\n")
code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0)
# 暂时在这里转换 WriteCodeWithTools 的输出
if isinstance(code, str):
code = {'code': code, 'language': 'python'}
return code, todo

View file

@ -1,64 +1,43 @@
from metagpt.actions.ask_review import ReviewConst
from metagpt.actions.debug_code import DebugCode
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.ml_da_action import Reflect, SummarizeAnalysis, UpdateDataColumns
from metagpt.actions.ml_da_action import UpdateDataColumns
from metagpt.actions.write_analysis_code import WriteCodeWithToolsML
from metagpt.actions.write_code_steps import WriteCodeSteps
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
from metagpt.schema import Message
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.utils.common import any_to_str
class MLEngineer(CodeInterpreter):
use_code_steps: bool = False
use_udfs: bool = False
data_desc: dict = {}
debug_context: list = []
latest_code: str = ""
def __init__(self, name="Mark", profile="MLEngineer", **kwargs):
super().__init__(name=name, profile=profile, **kwargs)
# self._watch([DownloadData, SubmitResult]) # in multi-agent settings
async def _plan_and_act(self):
### a new attempt on the data, relevant in a multi-agent multi-turn setting ###
await self._prepare_data_context()
### general plan process ###
await super()._plan_and_act()
### summarize analysis ###
summary = await SummarizeAnalysis().run(self.planner.plan)
rsp = Message(content=summary, cause_by=SummarizeAnalysis)
self.rc.memory.add(rsp)
return rsp
async def _write_and_exec_code(self, max_retry: int = 3):
self.planner.current_task.code_steps = (
await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else ""
)
code, result, success = await super()._write_and_exec_code(max_retry=max_retry)
if success:
if self.use_tools and self.planner.current_task.task_type in ["data_preprocess", "feature_engineering"]:
update_success, new_code = await self._update_data_columns()
if update_success:
code = code + "\n\n" + new_code
return code, result, success
async def _write_code(self):
if not self.use_tools:
return await super()._write_code()
code_execution_count = sum([msg.cause_by == any_to_str(ExecutePyCode) for msg in self.working_memory.get()])
# In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is.
is_first_trial = any_to_str(ExecutePyCode) not in [msg.cause_by for msg in self.working_memory.get()]
if code_execution_count > 0:
logger.warning("We got a bug code, now start to debug...")
if is_first_trial:
# For the first trial, write task code from scratch
column_info = await self._update_data_columns()
logger.info("Write code with tools")
tool_context, code = await WriteCodeWithToolsML().run(
context=[], # context assembled inside the Action
plan=self.planner.plan,
column_info=column_info,
)
self.debug_context = tool_context
cause_by = WriteCodeWithToolsML
else:
# Previous trials resulted in error, debug and rewrite the code
logger.warning("We got a bug, now start to debug...")
code = await DebugCode().run(
code=self.latest_code,
runtime_result=self.working_memory.get(),
@ -67,49 +46,21 @@ class MLEngineer(CodeInterpreter):
logger.info(f"new code \n{code}")
cause_by = DebugCode
else:
logger.info("Write code with tools")
tool_context, code = await WriteCodeWithToolsML().run(
context=[], # context assembled inside the Action
plan=self.planner.plan,
column_info=self.data_desc.get("column_info", ""),
)
self.debug_context = tool_context
cause_by = WriteCodeWithToolsML
self.latest_code = code
self.latest_code = code["code"]
return code, cause_by
async def _update_data_columns(self):
current_task = self.planner.plan.current_task
if current_task.task_type not in [
ToolTypeEnum.DATA_PREPROCESS.value,
ToolTypeEnum.FEATURE_ENGINEERING.value,
ToolTypeEnum.MODEL_TRAIN.value,
]:
return ""
logger.info("Check columns in updated data")
rsp = await UpdateDataColumns().run(self.planner.plan)
is_update, code = rsp["is_update"], rsp["code"]
code = await UpdateDataColumns().run(self.planner.plan)
success = False
if is_update:
result, success = await self.execute_code.run(code)
if success:
print(result)
self.data_desc["column_info"] = result
return success, code
async def _prepare_data_context(self):
memories = self.get_memories()
if memories:
latest_event = memories[-1].cause_by
if latest_event == DownloadData:
self.planner.plan.context = memories[-1].content
elif latest_event == SubmitResult:
# self reflect on previous plan outcomes and think about how to improve the plan, add to working memory
await self._reflect()
# get feedback for improvement from human, add to working memory
await self.planner.ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
async def _reflect(self):
context = self.get_memories()
context = "\n".join([str(msg) for msg in context])
reflection = await Reflect().run(context=context)
self.working_memory.add(Message(content=reflection, role="assistant"))
self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
result, success = await self.execute_code.run(**code)
print(result)
return result if success else ""

View file

@ -7,17 +7,11 @@
"""
from enum import Enum
from metagpt.tools import tool_types # this registers all tool types
from metagpt.tools import libs # this registers all tools
from metagpt.tools.tool_registry import TOOL_REGISTRY
from pydantic import BaseModel
from metagpt.const import TOOL_LIBS_PATH
from metagpt.prompts.tool_type import (
DATA_PREPROCESS_PROMPT,
FEATURE_ENGINEERING_PROMPT,
MODEL_TRAIN_PROMPT,
MODEL_EVALUATE_PROMPT,
VISION_PROMPT,
)
_ = tool_types, libs, TOOL_REGISTRY # Avoid pre-commit error
class SearchEngineType(Enum):
@ -37,62 +31,3 @@ class WebBrowserEngineType(Enum):
def __missing__(cls, key):
"""Default type conversion"""
return cls.CUSTOM
class ToolType(BaseModel):
name: str
module: str = ""
desc: str
usage_prompt: str = ""
TOOL_TYPE_MAPPINGS = {
"data_preprocess": ToolType(
name="data_preprocess",
module=str(TOOL_LIBS_PATH / "data_preprocess"),
desc="Only for changing value inplace.",
usage_prompt=DATA_PREPROCESS_PROMPT,
),
"feature_engineering": ToolType(
name="feature_engineering",
module=str(TOOL_LIBS_PATH / "feature_engineering"),
desc="Only for creating new columns for input data.",
usage_prompt=FEATURE_ENGINEERING_PROMPT,
),
"model_train": ToolType(
name="model_train",
module="",
desc="Only for training model.",
usage_prompt=MODEL_TRAIN_PROMPT,
),
"model_evaluate": ToolType(
name="model_evaluate",
module="",
desc="Only for evaluating model.",
usage_prompt=MODEL_EVALUATE_PROMPT,
),
"stable_diffusion": ToolType(
name="stable_diffusion",
module="metagpt.tools.sd_engine",
desc="Related to text2image, image2image using stable diffusion model.",
usage_prompt="",
),
"scrape_web": ToolType(
name="scrape_web",
module="metagpt.tools.functions.libs.scrape_web.scrape_web",
desc="Scrape data from web page.",
usage_prompt="",
),
"vision": ToolType(
name="vision",
module=str(TOOL_LIBS_PATH / "vision"),
desc="Only for converting image into webpage code.",
usage_prompt=VISION_PROMPT,
),
"other": ToolType(
name="other",
module="",
desc="Any tasks that do not fit into the previous categories",
usage_prompt="",
),
}

View file

@ -1,6 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/16 16:32
# @Author : lidanyang
# @File : __init__.py
# @Desc :

View file

@ -1,6 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/16 16:32
# @Author : lidanyang
# @File : __init__.py
# @Desc :

View file

@ -1,16 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/12/10 20:12
# @Author : lidanyang
# @File : base
# @Desc :
class MLProcess(object):
def fit(self, df):
raise NotImplementedError
def transform(self, df):
raise NotImplementedError
def fit_transform(self, df):
self.fit(df)
return self.transform(df)

View file

@ -1 +0,0 @@
from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web

View file

@ -1,126 +0,0 @@
import ast
import os
import re
import yaml
import inspect
import importlib
from pathlib import Path
from typing import List
from metagpt.logs import logger
def extract_function_signatures(file_path):
with open(file_path, "r", encoding="utf-8") as file:
source_code = file.read()
tree = ast.parse(source_code)
function_signatures = []
function_returns = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# 只提取用户自定义函数,排除内置函数
if not (node.name.startswith("__") and node.name.endswith("__")):
# 获取函数名
function_name = node.name
# 获取参数列表
args = [arg.arg for arg in node.args.args]
# 获取函数签名
function_signature = f"{function_name}({', '.join(args)})"
# 导入函数
module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)]
module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}")
# 将函数导入到当前命名空间
globals().update({function_name: getattr(module, function_name)})
# 获取函数注释和函数路径
function_schema = {
"udf_name": function_signature,
"udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}",
"udf_doc": inspect.getdoc(getattr(module, function_name)),
}
function_signatures.append(function_schema)
# 获取函数返回变量名
source_lines, _ = inspect.getsourcelines(getattr(module, function_name))
for line in source_lines:
if line.strip().startswith("return "):
function_returns.append(
{
"udf_name": function_name,
"udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")],
}
)
break
# 没有返回值的函数
if not function_returns or function_returns[-1]["udf_name"] != function_name:
function_returns.append({"udf_name": function_name, "udf_returns": [None]})
return function_signatures, function_returns
def get_function_signatures_in_folder(folder_path):
python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"]
all_function_signatures = []
all_function_returns = []
for file_name in python_files:
file_path = os.path.join(folder_path, file_name)
function_signatures, function_returns = extract_function_signatures(file_path)
all_function_signatures.extend(function_signatures)
all_function_returns.extend(function_returns)
return all_function_signatures, all_function_returns
# Create Tools Yaml Style Schema
def docstring_to_yaml(docstring: str, return_vars: List[str] = None):
logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n")
if docstring is None:
return {}
# 匹配简介部分
description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL)
description = description_match.group(1).strip() if description_match else ""
# 匹配Args部分
args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL)
_args = args_match.group(1).strip() if args_match else ""
variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
params = variable_pattern.findall(_args)
if not params:
params = ((None, None, None),)
# 匹配Returns部分
returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL)
returns = returns_match.group(1).strip() if returns_match else ""
return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
# 添加返回值变量名
return_vars = return_vars if isinstance(return_vars, list) else [return_vars]
returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)]
# 构建YAML字典
yaml_data = {
"description": description.strip(".").strip(),
"parameters": {
"properties": {
param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None
},
"required": [param[0] for param in params if param[0] is not None],
},
"returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns},
}
return yaml_data
def extract_function_schema_yaml_in_folder(folder_path: str):
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
function_schema_yaml_data = {}
for func_docstring, func_returns in zip(function_signatures, function_returns):
if func_docstring["udf_doc"]:
fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"])
fun_yaml_data.update({"type": "function"})
function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data})
return yaml.dump(function_schema_yaml_data, default_flow_style=False)
folder_path = str(Path(__file__).parent.absolute())
function_signatures, function_returns = get_function_signatures_in_folder(folder_path)
UDFS = [func for func in function_signatures]
UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path)
UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader)

View file

@ -1,306 +0,0 @@
FillMissingValue:
type: class
description: "Completing missing values with simple strategies"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
strategy:
type: str
description: "the imputation strategy, notice mean/median can only be used for numeric features"
default: mean
enum:
- mean
- median
- most_frequent
- constant
fill_value:
type: int
description: "fill_value is used to replace all occurrences of missing_values"
default: null
required:
- features
fit:
description: "Fit the FillMissingValue model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MinMaxScale:
type: class
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MinMaxScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
StandardScale:
type: class
description: "Standardize features by removing the mean and scaling to unit variance"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the StandardScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
MaxAbsScale:
type: class
description: "cale each feature by its maximum absolute value"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MaxAbsScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
LabelEncode:
type: class
description: "Apply label encoding to specified categorical columns in-place."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be label encoded"
required:
- features
fit:
description: "Fit the LabelEncode model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
OneHotEncode:
type: class
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be one-hot encoded and dropped"
required:
- features
fit:
description: "Fit the OneHotEncoding model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,15 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/16 16:32
# @Author : lidanyang
# @File : __init__.py
# @Desc :
from metagpt.tools.libs import (
data_preprocess,
feature_engineering,
sd_engine,
gpt_v_generator,
web_scrapping,
)
_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping # Avoid pre-commit error

View file

@ -13,9 +13,25 @@ from sklearn.preprocessing import (
StandardScaler,
)
from metagpt.tools.functions.libs.base import MLProcess
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value
class MLProcess(object):
def fit(self, df):
raise NotImplementedError
def transform(self, df):
raise NotImplementedError
def fit_transform(self, df):
self.fit(df)
return self.transform(df)
@register_tool(tool_type=TOOL_TYPE)
class FillMissingValue(MLProcess):
def __init__(
self,
@ -42,6 +58,7 @@ class FillMissingValue(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class MinMaxScale(MLProcess):
def __init__(
self,
@ -60,6 +77,7 @@ class MinMaxScale(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class StandardScale(MLProcess):
def __init__(
self,
@ -78,6 +96,7 @@ class StandardScale(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class MaxAbsScale(MLProcess):
def __init__(
self,
@ -96,6 +115,7 @@ class MaxAbsScale(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class RobustScale(MLProcess):
def __init__(
self,
@ -114,6 +134,7 @@ class RobustScale(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class OrdinalEncode(MLProcess):
def __init__(
self,
@ -132,6 +153,7 @@ class OrdinalEncode(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class OneHotEncode(MLProcess):
def __init__(
self,
@ -153,6 +175,7 @@ class OneHotEncode(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class LabelEncode(MLProcess):
def __init__(
self,
@ -181,6 +204,7 @@ class LabelEncode(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
def get_column_info(df: pd.DataFrame) -> dict:
column_info = {
"Category": [],

View file

@ -6,7 +6,7 @@
# @Desc : Feature Engineering Tools
import itertools
import lightgbm as lgb
# import lightgbm as lgb
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
@ -15,9 +15,14 @@ from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from metagpt.tools.functions.libs.base import MLProcess
from metagpt.tools.libs.data_preprocess import MLProcess
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
TOOL_TYPE = ToolTypeEnum.FEATURE_ENGINEERING.value
@register_tool(tool_type=TOOL_TYPE)
class PolynomialExpansion(MLProcess):
def __init__(self, cols: list, degree: int = 2, label_col: str = None):
self.cols = cols
@ -48,6 +53,7 @@ class PolynomialExpansion(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class CatCount(MLProcess):
def __init__(self, col: str):
self.col = col
@ -62,6 +68,7 @@ class CatCount(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class TargetMeanEncoder(MLProcess):
def __init__(self, col: str, label: str):
self.col = col
@ -77,6 +84,7 @@ class TargetMeanEncoder(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class KFoldTargetMeanEncoder(MLProcess):
def __init__(self, col: str, label: str, n_splits: int = 5, random_state: int = 2021):
self.col = col
@ -103,6 +111,7 @@ class KFoldTargetMeanEncoder(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class CatCross(MLProcess):
def __init__(self, cols: list, max_cat_num: int = 100):
self.cols = cols
@ -138,6 +147,7 @@ class CatCross(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class GroupStat(MLProcess):
def __init__(self, group_col: str, agg_col: str, agg_funcs: list):
self.group_col = group_col
@ -157,6 +167,7 @@ class GroupStat(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class SplitBins(MLProcess):
def __init__(self, cols: list, strategy: str = "quantile"):
self.cols = cols
@ -173,6 +184,7 @@ class SplitBins(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class ExtractTimeComps(MLProcess):
def __init__(self, time_col: str, time_comps: list):
self.time_col = time_col
@ -201,6 +213,7 @@ class ExtractTimeComps(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class GeneralSelection(MLProcess):
def __init__(self, label_col: str):
self.label_col = label_col
@ -228,6 +241,7 @@ class GeneralSelection(MLProcess):
return new_df
# skip for now because lgb is needed
class TreeBasedSelection(MLProcess):
def __init__(self, label_col: str, task_type: str):
self.label_col = label_col
@ -270,6 +284,7 @@ class TreeBasedSelection(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
class VarianceBasedSelection(MLProcess):
def __init__(self, label_col: str, threshold: float = 0):
self.label_col = label_col

View file

@ -5,18 +5,13 @@
@Author : mannaandpoem
@File : vision.py
"""
import base64
from pathlib import Path
import requests
import base64
from metagpt.config import CONFIG
OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
API_KEY = CONFIG.OPENAI_API_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
@ -33,8 +28,15 @@ As the design pays tribute to large companies, sometimes it is normal for some c
Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
class Vision:
@register_tool(tool_type=ToolTypeEnum.IMAGE2WEBPAGE.value)
class GPTvGenerator:
def __init__(self):
from metagpt.config import CONFIG
OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL
API_KEY = CONFIG.OPENAI_API_KEY
MODEL = CONFIG.OPENAI_VISION_MODEL
MAX_TOKENS = CONFIG.VISION_MAX_TOKENS
self.api_key = API_KEY
self.api_base = OPENAI_API_BASE
self.model = MODEL
@ -51,10 +53,7 @@ class Vision:
def get_result(self, image_path, prompt):
base64_image = self.encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
payload = {
"model": self.model,
"messages": [
@ -62,11 +61,8 @@ class Vision:
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
],
}
],
"max_tokens": self.max_tokens,
@ -81,7 +77,7 @@ class Vision:
@staticmethod
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
return base64.b64encode(image_file.read()).decode("utf-8")
@staticmethod
def save_webpages(image_path, webpages) -> Path:

View file

@ -13,9 +13,10 @@ import requests
from aiohttp import ClientSession
from PIL import Image, PngImagePlugin
from metagpt.config import CONFIG
from metagpt.const import SD_OUTPUT_FILE_REPO
from metagpt.logs import logger
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
payload = {
"prompt": "",
@ -51,8 +52,11 @@ payload = {
default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"
@register_tool(tool_type=ToolTypeEnum.STABLE_DIFFUSION.value)
class SDEngine:
def __init__(self, sd_url=""):
from metagpt.config import CONFIG
# Initialize the SDEngine with configuration
self.sd_url = sd_url if sd_url else CONFIG.get("SD_URL")
self.sd_t2i_url = f"{self.sd_url}{CONFIG.get('SD_T2I_API')}"

View file

@ -1,9 +1,10 @@
import asyncio
from metagpt.tools.tool_data_type import ToolTypeEnum
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper
async def scrape_web(url, *urls):
@register_tool(tool_type=ToolTypeEnum.WEBSCRAPING.value)
async def scrape_web_playwright(url, *urls):
"""
Scrape and save the HTML structure and inner text content of a web page using Playwright.
@ -19,5 +20,3 @@ async def scrape_web(url, *urls):
# Return the inner text content of the web page
return {"inner_text": web.inner_text, "html": web.html}
# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP

View file

@ -0,0 +1,61 @@
FillMissingValue:
type: class
description: "Completing missing values with simple strategies"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
strategy:
type: str
description: "the imputation strategy, notice mean/median can only be used for numeric features"
default: mean
enum:
- mean
- median
- most_frequent
- constant
fill_value:
type: int
description: "fill_value is used to replace all occurrences of missing_values"
default: null
required:
- features
fit:
description: "Fit the FillMissingValue model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
LabelEncode:
type: class
description: "Apply label encoding to specified categorical columns in-place."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be label encoded"
required:
- features
fit:
description: "Fit the LabelEncode model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
MaxAbsScale:
type: class
description: "cale each feature by its maximum absolute value"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MaxAbsScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
MinMaxScale:
type: class
description: "Transform features by scaling each feature to a range, witch is (0, 1)"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the MinMaxScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
OneHotEncode:
type: class
description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "Categorical columns to be one-hot encoded and dropped"
required:
- features
fit:
description: "Fit the OneHotEncoding model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
StandardScale:
type: class
description: "Standardize features by removing the mean and scaling to unit variance"
methods:
__init__:
description: "Initialize self."
parameters:
properties:
features:
type: list
description: "columns to be processed"
required:
- features
fit:
description: "Fit the StandardScale model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
CatCount:
type: class
description: "Add value counts of a categorical column as new feature."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column for value counts."
required:
- col
fit:
description: "Fit the CatCount model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,52 @@
CatCross:
type: class
description: "Add pairwise crossed features and convert them to numerical features."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be pairwise crossed, at least 2 columns."
max_cat_num:
type: int
description: "Maximum unique categories per crossed feature."
default: 100
required:
- cols
fit:
description: "Fit the CatCross model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,48 @@
GeneralSelection:
type: class
description: "Drop all nan feats and feats with only one unique value."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
required:
- label_col
fit:
description: "Fit the GeneralSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,58 @@
GroupStat:
type: class
description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
group_col:
type: str
description: "Column used for grouping."
agg_col:
type: str
description: "Column on which aggregation is performed."
agg_funcs:
type: list
description: >-
List of aggregation functions to apply, such as ['mean', 'std'].
Each function must be supported by pandas.
required:
- group_col
- agg_col
- agg_funcs
fit:
description: "Fit the GroupStat model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,60 @@
KFoldTargetMeanEncoder:
type: class
description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be k-fold mean encoded."
label:
type: str
description: "Predicted label column."
n_splits:
type: int
description: "Number of splits for K-fold."
default: 5
random_state:
type: int
description: "Random seed."
default: 2021
required:
- col
- label
fit:
description: "Fit the KFoldTargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,56 @@
SplitBins:
type: class
description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
cols:
type: list
description: "Columns to be binned inplace."
strategy:
type: str
description: "Strategy used to define the widths of the bins."
default: quantile
enum:
- quantile
- uniform
- kmeans
required:
- cols
fit:
description: "Fit the SplitBins model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,52 @@
TargetMeanEncoder:
type: class
description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
col:
type: str
description: "Column to be mean encoded."
label:
type: str
description: "Predicted label column."
required:
- col
- label
fit:
description: "Fit the TargetMeanEncoder model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame."

View file

@ -0,0 +1,56 @@
TreeBasedSelection:
type: class
description: "Select features based on tree-based model and remove features with low importance."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
task_type:
type: str
description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression."
enum:
- cls
- mcls
- reg
required:
- label_col
- task_type
fit:
description: "Fit the TreeBasedSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."

View file

@ -0,0 +1,52 @@
VarianceBasedSelection:
type: class
description: "Select features based on variance and remove features with low variance."
methods:
__init__:
description: "Initialize self."
parameters:
properties:
label_col:
type: str
description: "Label column name."
threshold:
type: float
description: "Threshold for variance."
default: 0.0
required:
- label_col
fit:
description: "Fit the VarianceBasedSelection model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
transform:
description: "Transform the input DataFrame with the fitted model."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."
fit_transform:
description: "Fit and transform the input DataFrame."
parameters:
properties:
df:
type: DataFrame
description: "The input DataFrame."
required:
- df
returns:
df:
type: DataFrame
description: "The transformed DataFrame contain label_col."

View file

@ -1,4 +1,4 @@
Vision:
GPTvGenerator:
type: class
description: "Class for generating web pages at once."
methods:

View file

@ -1,4 +1,4 @@
scrape_web:
scrape_web_playwright:
type: async funciton
description: "Scrape and save the HTML structure and inner text content of a web page using Playwright."
parameters:

View file

@ -0,0 +1,35 @@
from enum import Enum
from pydantic import BaseModel
class ToolTypeEnum(Enum):
EDA = "eda"
DATA_PREPROCESS = "data_preprocess"
FEATURE_ENGINEERING = "feature_engineering"
MODEL_TRAIN = "model_train"
MODEL_EVALUATE = "model_evaluate"
STABLE_DIFFUSION = "stable_diffusion"
IMAGE2WEBPAGE = "image2webpage"
WEBSCRAPING = "web_scraping"
OTHER = "other"
def __missing__(self, key):
return self.OTHER
class ToolType(BaseModel):
name: str
desc: str
usage_prompt: str = ""
class ToolSchema(BaseModel):
name: str
class Tool(BaseModel):
name: str
path: str
schemas: dict = {}
code: str = ""

View file

@ -0,0 +1,126 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/01/12 17:07
@Author : garylin2099
@File : tool_registry.py
"""
import inspect
import os
import re
from collections import defaultdict
import yaml
from metagpt.const import TOOL_SCHEMA_PATH
from metagpt.logs import logger
from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolType
class ToolRegistry:
def __init__(self):
self.tools = {}
self.tool_types = {}
self.tools_by_types = defaultdict(dict) # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
def register_tool_type(self, tool_type: ToolType):
self.tool_types[tool_type.name] = tool_type
logger.info(f"tool type {tool_type.name} registered")
def register_tool(
self,
tool_name,
tool_path,
schema_path=None,
tool_code="",
tool_type="other",
make_schema_if_not_exists=False,
):
if self.has_tool(tool_name):
return
schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type / f"{tool_name}.yml"
if not os.path.exists(schema_path):
if make_schema_if_not_exists:
logger.warning(f"no schema found, will make schema at {schema_path}")
make_schema(tool_code, schema_path)
else:
logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}")
return
with open(schema_path, "r", encoding="utf-8") as f:
schema_dict = yaml.safe_load(f)
schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0]
schemas["tool_path"] = tool_path # corresponding code file path of the tool
try:
ToolSchema(**schemas) # validation
except Exception:
pass
# logger.warning(
# f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}"
# )
tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code)
self.tools[tool_name] = tool
self.tools_by_types[tool_type][tool_name] = tool
logger.info(f"{tool_name} registered")
def has_tool(self, key):
return key in self.tools
def get_tool(self, key):
return self.tools.get(key)
def get_tools_by_type(self, key):
return self.tools_by_types.get(key)
def has_tool_type(self, key):
return key in self.tool_types
def get_tool_type(self, key):
return self.tool_types.get(key)
def get_tool_types(self):
return self.tool_types
# Registry instance
TOOL_REGISTRY = ToolRegistry()
def register_tool_type(cls):
"""register a tool type to registry"""
TOOL_REGISTRY.register_tool_type(tool_type=cls())
return cls
def register_tool(tool_name="", tool_type="other", schema_path=None):
"""register a tool to registry"""
def decorator(cls, tool_name=tool_name):
tool_name = tool_name or cls.__name__
# Get the file path where the function / class is defined and the source code
file_path = inspect.getfile(cls)
if "metagpt" in file_path:
file_path = re.search("metagpt.+", file_path).group(0)
source_code = inspect.getsource(cls)
TOOL_REGISTRY.register_tool(
tool_name=tool_name,
tool_path=file_path,
schema_path=schema_path,
tool_code=source_code,
tool_type=tool_type,
)
return cls
return decorator
def make_schema(tool_code, path):
os.makedirs(os.path.dirname(path), exist_ok=True) # Create the necessary directories
schema = {} # an empty schema for now
with open(path, "w", encoding="utf-8") as f:
yaml.dump(schema, f)
return path

View file

@ -0,0 +1,68 @@
from metagpt.prompts.tool_types import (
DATA_PREPROCESS_PROMPT,
FEATURE_ENGINEERING_PROMPT,
IMAGE2WEBPAGE_PROMPT,
MODEL_EVALUATE_PROMPT,
MODEL_TRAIN_PROMPT,
)
from metagpt.tools.tool_data_type import ToolType, ToolTypeEnum
from metagpt.tools.tool_registry import register_tool_type
@register_tool_type
class EDA(ToolType):
name: str = ToolTypeEnum.EDA.value
desc: str = "For performing exploratory data analysis"
@register_tool_type
class DataPreprocess(ToolType):
name: str = ToolTypeEnum.DATA_PREPROCESS.value
desc: str = "Only for changing value inplace."
usage_prompt: str = DATA_PREPROCESS_PROMPT
@register_tool_type
class FeatureEngineer(ToolType):
name: str = ToolTypeEnum.FEATURE_ENGINEERING.value
desc: str = "Only for creating new columns for input data."
usage_prompt: str = FEATURE_ENGINEERING_PROMPT
@register_tool_type
class ModelTrain(ToolType):
name: str = ToolTypeEnum.MODEL_TRAIN.value
desc: str = "Only for training model."
usage_prompt: str = MODEL_TRAIN_PROMPT
@register_tool_type
class ModelEvaluate(ToolType):
name: str = ToolTypeEnum.MODEL_EVALUATE.value
desc: str = "Only for evaluating model."
usage_prompt: str = MODEL_EVALUATE_PROMPT
@register_tool_type
class StableDiffusion(ToolType):
name: str = ToolTypeEnum.STABLE_DIFFUSION.value
desc: str = "Related to text2image, image2image using stable diffusion model."
@register_tool_type
class Image2Webpage(ToolType):
name: str = ToolTypeEnum.IMAGE2WEBPAGE.value
desc: str = "For converting image into webpage code."
usage_prompt: str = IMAGE2WEBPAGE_PROMPT
@register_tool_type
class WebScraping(ToolType):
name: str = ToolTypeEnum.WEBSCRAPING.value
desc: str = "For scraping data from web pages."
@register_tool_type
class Other(ToolType):
name: str = ToolTypeEnum.OTHER.value
desc: str = "Any tools not in the defined categories"

View file

@ -12,7 +12,6 @@ from typing import Literal
from playwright.async_api import async_playwright
from metagpt.config import CONFIG
from metagpt.logs import logger
from metagpt.utils.parse_html import WebPage
@ -32,6 +31,8 @@ class PlaywrightWrapper:
launch_kwargs: dict | None = None,
**kwargs,
) -> None:
from metagpt.config import CONFIG
if browser_type is None:
browser_type = CONFIG.playwright_browser_type
self.browser_type = browser_type

View file

@ -1,6 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/17 10:24
# @Author : lidanyang
# @File : __init__.py
# @Desc :

View file

@ -1,48 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : test_vision.py
"""
import pytest
from metagpt import logs
from metagpt.tools.functions.libs.vision import Vision
@pytest.fixture
def mock_webpages():
return """```html\n<html>\n<script src="scripts.js"></script>
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
```css\n.class { ... }\n```\n
```javascript\nfunction() { ... }\n```\n"""
def test_vision_generate_webpages(mocker, mock_webpages):
mocker.patch(
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
return_value=mock_webpages
)
image_path = "image.png"
vision = Vision()
rsp = vision.generate_web_pages(image_path=image_path)
logs.logger.info(rsp)
assert "html" in rsp
assert "css" in rsp
assert "javascript" in rsp
def test_save_webpages(mocker, mock_webpages):
mocker.patch(
"metagpt.tools.functions.libs.vision.Vision.generate_web_pages",
return_value=mock_webpages
)
image_path = "image.png"
vision = Vision()
webpages = vision.generate_web_pages(image_path)
webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()

View file

@ -5,7 +5,7 @@ import numpy.testing as npt
import pandas as pd
import pytest
from metagpt.tools.functions.libs.data_preprocess import (
from metagpt.tools.libs.data_preprocess import (
FillMissingValue,
LabelEncode,
MaxAbsScale,

View file

@ -3,7 +3,7 @@ import pandas as pd
import pytest
from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris
from metagpt.tools.functions.libs.feature_engineering import (
from metagpt.tools.libs.feature_engineering import (
CatCount,
CatCross,
ExtractTimeComps,
@ -147,6 +147,7 @@ def test_general_selection(mock_dataset):
assert "cat2" not in transformed.columns
@pytest.mark.skip # skip because TreeBasedSelection needs lgb as dependency
def test_tree_based_selection(mock_dataset):
# regression
data = load_sklearn_data("housing")

View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : test_vision.py
"""
import pytest
from metagpt import logs
from metagpt.tools.libs.gpt_v_generator import GPTvGenerator
@pytest.fixture
def mock_webpages(mocker):
mock_data = """```html\n<html>\n<script src="scripts.js"></script>
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
```css\n.class { ... }\n```\n
```javascript\nfunction() { ... }\n```\n"""
mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_web_pages", return_value=mock_data)
return mocker
def test_vision_generate_webpages(mock_webpages):
image_path = "image.png"
generator = GPTvGenerator()
rsp = generator.generate_web_pages(image_path=image_path)
logs.logger.info(rsp)
assert "html" in rsp
assert "css" in rsp
assert "javascript" in rsp
def test_save_webpages(mock_webpages):
image_path = "image.png"
generator = GPTvGenerator()
webpages = generator.generate_web_pages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()

View file

@ -4,7 +4,7 @@
# @Desc :
import pytest
from metagpt.tools.sd_engine import SDEngine
from metagpt.tools.libs.sd_engine import SDEngine
def test_sd_tools():

View file

@ -3,7 +3,7 @@ import json
import yaml
from metagpt.logs import logger
from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml
def test_udfs():

View file

@ -0,0 +1,101 @@
import pytest
from metagpt.tools.tool_registry import ToolRegistry
from metagpt.tools.tool_types import ToolType
@pytest.fixture
def tool_registry():
return ToolRegistry()
@pytest.fixture
def schema_yaml(mocker):
mock_yaml_content = """
tool_name:
key1: value1
key2: value2
"""
mocker.patch("os.path.exists", return_value=True)
mocker.patch("builtins.open", mocker.mock_open(read_data=mock_yaml_content))
return mocker
# Test Initialization
def test_initialization(tool_registry):
assert isinstance(tool_registry, ToolRegistry)
assert tool_registry.tools == {}
assert tool_registry.tool_types == {}
assert tool_registry.tools_by_types == {}
# Test Tool Type Registration
def test_register_tool_type(tool_registry):
tool_type = ToolType(name="TestType", desc="test")
tool_registry.register_tool_type(tool_type)
assert "TestType" in tool_registry.tool_types
# Test Tool Registration
def test_register_tool(tool_registry, schema_yaml):
tool_registry.register_tool("TestTool", "/path/to/tool")
assert "TestTool" in tool_registry.tools
# Test Tool Registration with Non-existing Schema
def test_register_tool_no_schema(tool_registry, mocker):
mocker.patch("os.path.exists", return_value=False)
tool_registry.register_tool("TestTool", "/path/to/tool")
assert "TestTool" not in tool_registry.tools
# Test Tool Existence Checks
def test_has_tool(tool_registry, schema_yaml):
tool_registry.register_tool("TestTool", "/path/to/tool")
assert tool_registry.has_tool("TestTool")
assert not tool_registry.has_tool("NonexistentTool")
# Test Tool Retrieval
def test_get_tool(tool_registry, schema_yaml):
tool_registry.register_tool("TestTool", "/path/to/tool")
tool = tool_registry.get_tool("TestTool")
assert tool is not None
assert tool.name == "TestTool"
assert tool.path == "/path/to/tool"
# Similar tests for has_tool_type, get_tool_type, get_tools_by_type
def test_has_tool_type(tool_registry):
tool_type = ToolType(name="TestType", desc="test")
tool_registry.register_tool_type(tool_type)
assert tool_registry.has_tool_type("TestType")
assert not tool_registry.has_tool_type("NonexistentType")
def test_get_tool_type(tool_registry):
tool_type = ToolType(name="TestType", desc="test")
tool_registry.register_tool_type(tool_type)
retrieved_type = tool_registry.get_tool_type("TestType")
assert retrieved_type is not None
assert retrieved_type.name == "TestType"
def test_get_tools_by_type(tool_registry, schema_yaml):
tool_type_name = "TestType"
tool_name = "TestTool"
tool_path = "/path/to/tool"
tool_type = ToolType(name=tool_type_name, desc="test")
tool_registry.register_tool_type(tool_type)
tool_registry.register_tool(tool_name, tool_path, tool_type=tool_type_name)
tools_by_type = tool_registry.get_tools_by_type(tool_type_name)
assert tools_by_type is not None
assert tool_name in tools_by_type
# Test case for when the tool type does not exist
def test_get_tools_by_nonexistent_type(tool_registry):
tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
assert tools_by_type is None

View file

@ -69,7 +69,6 @@ class MockLLM(OriginalLLM):
A copy of metagpt.provider.openai_api.OpenAILLM.aask_code, we can't use super().aask because it will be mocked.
Since openai_api.OpenAILLM.aask_code is different from base_llm.BaseLLM.aask_code, we use the former.
"""
messages = self._process_message(messages)
rsp = await self._achat_completion_function(messages, **kwargs)
return self.get_choice_function_arguments(rsp)