diff --git a/.gitignore b/.gitignore index 87c7b3120..a69b3b1c2 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ tests/metagpt/utils/file_repo_git *.png htmlcov htmlcov.* +cov.xml *.dot *.pkl *-structure.csv diff --git a/docs/FAQ-EN.md b/docs/FAQ-EN.md index d4a9f6097..145d27be9 100644 --- a/docs/FAQ-EN.md +++ b/docs/FAQ-EN.md @@ -130,7 +130,7 @@ 1. HTML Layout: Outputs the HTML code for the page. 1. CSS Styles (styles.css): Outputs the CSS code for the page. - 1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/sd_engine. + 1. Currently, the SD skill is a tool invoked by UIDesign. It instantiates the SDEngine, with specific code found in metagpt/tools/libs/sd_engine.py. 1. Configuration instructions for SD Skills: The SD interface is currently deployed based on *https://github.com/AUTOMATIC1111/stable-diffusion-webui* **For environmental configurations and model downloads, please refer to the aforementioned GitHub repository. To initiate the SD service that supports API calls, run the command specified in cmd with the parameter nowebui, i.e., diff --git a/metagpt/actions/debug_code.py b/metagpt/actions/debug_code.py index e5e0ac5d4..121c126c4 100644 --- a/metagpt/actions/debug_code.py +++ b/metagpt/actions/debug_code.py @@ -119,5 +119,4 @@ class DebugCode(BaseWriteAnalysisCode): runtime_result=runtime_result, ) # 根据reflection结果重写代码 - improv_code = reflection["improved_impl"] - return improv_code + return {"code": reflection["improved_impl"]} diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 9104fdf82..cf806a986 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -8,11 +8,9 @@ import re from pathlib import Path from typing import Dict, List, Tuple, Union -import yaml from tenacity import retry, stop_after_attempt, wait_fixed from metagpt.actions import Action -from metagpt.const import TOOL_SCHEMA_PATH from metagpt.llm import LLM from metagpt.logs import logger from metagpt.prompts.ml_engineer import ( @@ -24,12 +22,9 @@ from metagpt.prompts.ml_engineer import ( TOOL_USAGE_PROMPT, ) from metagpt.schema import Message, Plan -from metagpt.tools import TOOL_TYPE_MAPPINGS +from metagpt.tools.tool_registry import TOOL_REGISTRY from metagpt.utils.common import create_func_config, remove_comments -TOOL_TYPE_MODULE = {k: v.module for k, v in TOOL_TYPE_MAPPINGS.items()} -TOOL_TYPE_USAGE_PROMPT = {k: v.usage_prompt for k, v in TOOL_TYPE_MAPPINGS.items()} - class BaseWriteAnalysisCode(Action): DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt @@ -64,7 +59,7 @@ class BaseWriteAnalysisCode(Action): } return messages - async def run(self, context: List[Message], plan: Plan = None) -> str: + async def run(self, context: List[Message], plan: Plan = None) -> dict: """Run of a code writing action, used in data analysis or modeling Args: @@ -72,7 +67,7 @@ class BaseWriteAnalysisCode(Action): plan (Plan, optional): Overall plan. Defaults to None. Returns: - str: The code string. + dict: code result in the format of {"code": "print('hello world')", "language": "python"} """ @@ -95,49 +90,27 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode): class WriteCodeWithTools(BaseWriteAnalysisCode): """Write code with help of local available tools. Choose tools first, then generate code to use the tools""" - schema_path: Union[Path, str] = TOOL_SCHEMA_PATH available_tools: dict = {} def __init__(self, **kwargs): super().__init__(**kwargs) - self._load_tools(self.schema_path) - def _load_tools(self, schema_path, schema_module=None): - """Load tools from yaml file""" - if isinstance(schema_path, dict): - schema_module = schema_module or "udf" - self.available_tools.update({schema_module: schema_path}) - else: - if isinstance(schema_path, list): - yml_files = schema_path - elif isinstance(schema_path, Path) and schema_path.is_file(): - yml_files = [schema_path] - else: - yml_files = schema_path.glob("*.yml") - - for yml_file in yml_files: - module = yml_file.stem - with open(yml_file, "r", encoding="utf-8") as f: - self.available_tools[module] = yaml.safe_load(f) - - def _parse_recommend_tools(self, module: str, recommend_tools: list) -> dict: + def _parse_recommend_tools(self, recommend_tools: list) -> dict: """ Parses and validates a list of recommended tools, and retrieves their schema from registry. Args: - module (str): The module name for querying tools in the registry. recommend_tools (list): A list of recommended tools. Returns: dict: A dict of valid tool schemas. """ valid_tools = [] - available_tools = self.available_tools[module].keys() - for tool in recommend_tools: - if tool in available_tools: - valid_tools.append(tool) + for tool_name in recommend_tools: + if TOOL_REGISTRY.has_tool(tool_name): + valid_tools.append(TOOL_REGISTRY.get_tool(tool_name)) - tool_catalog = {tool: self.available_tools[module][tool] for tool in valid_tools} + tool_catalog = {tool.name: tool.schemas for tool in valid_tools} return tool_catalog async def _tool_recommendation( @@ -176,31 +149,24 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): tool_type = ( plan.current_task.task_type ) # find tool type from task type through exact match, can extend to retrieval in the future - available_tools = self.available_tools.get(tool_type, {}) - special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "") + available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type) + special_prompt = ( + TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else "" + ) code_steps = plan.current_task.code_steps - finished_tasks = plan.get_finished_tasks() - code_context = [remove_comments(task.code) for task in finished_tasks] - code_context = "\n\n".join(code_context) - tool_catalog = {} - module_name = "" - if len(available_tools) > 0: - available_tools = {k: v["description"] for k, v in available_tools.items()} + if available_tools: + available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()} recommend_tools = await self._tool_recommendation( plan.current_task.instruction, code_steps, available_tools ) - tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools) + tool_catalog = self._parse_recommend_tools(recommend_tools) logger.info(f"Recommended tools: \n{recommend_tools}") - module_name = TOOL_TYPE_MODULE[tool_type] - - tools_instruction = TOOL_USAGE_PROMPT.format( - special_prompt=special_prompt, module_name=module_name, tool_catalog=tool_catalog - ) + tools_instruction = TOOL_USAGE_PROMPT.format(special_prompt=special_prompt, tool_catalog=tool_catalog) context.append(Message(content=tools_instruction, role="user")) @@ -208,7 +174,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) - return rsp["code"] + return rsp class WriteCodeWithToolsML(WriteCodeWithTools): @@ -219,26 +185,28 @@ class WriteCodeWithToolsML(WriteCodeWithTools): column_info: str = "", **kwargs, ) -> Tuple[List[Message], str]: - tool_type = plan.current_task.task_type - available_tools = self.available_tools.get(tool_type, {}) - special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "") + tool_type = ( + plan.current_task.task_type + ) # find tool type from task type through exact match, can extend to retrieval in the future + available_tools = TOOL_REGISTRY.get_tools_by_type(tool_type) + special_prompt = ( + TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else "" + ) code_steps = plan.current_task.code_steps finished_tasks = plan.get_finished_tasks() code_context = [remove_comments(task.code) for task in finished_tasks] code_context = "\n\n".join(code_context) - if len(available_tools) > 0: - available_tools = {k: v["description"] for k, v in available_tools.items()} + if available_tools: + available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()} recommend_tools = await self._tool_recommendation( plan.current_task.instruction, code_steps, available_tools ) - tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools) + tool_catalog = self._parse_recommend_tools(recommend_tools) logger.info(f"Recommended tools: \n{recommend_tools}") - module_name = TOOL_TYPE_MODULE[tool_type] - prompt = ML_TOOL_USAGE_PROMPT.format( user_requirement=plan.goal, history_code=code_context, @@ -246,7 +214,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools): column_info=column_info, special_prompt=special_prompt, code_steps=code_steps, - module_name=module_name, tool_catalog=tool_catalog, ) @@ -263,7 +230,7 @@ class WriteCodeWithToolsML(WriteCodeWithTools): tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) context = [Message(content=prompt, role="user")] - return context, rsp["code"] + return context, rsp class MakeTools(WriteCodeByGenerate): diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py index c7ef541b9..60dcef43b 100644 --- a/metagpt/actions/write_plan.py +++ b/metagpt/actions/write_plan.py @@ -12,7 +12,7 @@ from metagpt.actions import Action from metagpt.logs import logger from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT from metagpt.schema import Message, Plan, Task -from metagpt.tools import TOOL_TYPE_MAPPINGS +from metagpt.tools import TOOL_REGISTRY from metagpt.utils.common import CodeParser, create_func_config @@ -47,13 +47,16 @@ class WritePlan(Action): List[Dict]: tasks with task type assigned """ task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks]) - task_type_desc = "\n".join([f"- **{item.name}**: {item.desc}" for item in TOOL_TYPE_MAPPINGS.values()]) + task_type_desc = "\n".join( + [f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()] + ) # task type are binded with tool type now, should be improved in the future prompt = ASSIGN_TASK_TYPE_PROMPT.format( task_list=task_list, task_type_desc=task_type_desc ) # task types are set to be the same as tool types, for now tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG) rsp = await self.llm.aask_code(prompt, **tool_config) task_type_list = rsp["task_type"] + print(f"assigned task types: {task_type_list}") for task, task_type in zip(tasks, task_type_list): task["task_type"] = task_type return json.dumps(tasks) diff --git a/metagpt/const.py b/metagpt/const.py index a57464a19..7a19e81d0 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -70,8 +70,8 @@ TMP = METAGPT_ROOT / "tmp" SOURCE_ROOT = METAGPT_ROOT / "metagpt" PROMPT_PATH = SOURCE_ROOT / "prompts" SKILL_DIRECTORY = SOURCE_ROOT / "skills" -TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/functions/schemas" -TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/functions/libs" +TOOL_SCHEMA_PATH = METAGPT_ROOT / "metagpt/tools/schemas" +TOOL_LIBS_PATH = METAGPT_ROOT / "metagpt/tools/libs" # REAL CONSTS diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py index 3baf79843..3fd895e6e 100644 --- a/metagpt/prompts/ml_engineer.py +++ b/metagpt/prompts/ml_engineer.py @@ -15,7 +15,7 @@ Keep dataset column information updated before model train. # Task Update and print the dataset's column information only if the train or test data has changed. Use the following code: ```python -from metagpt.tools.functions.libs.data_preprocess import get_column_info +from metagpt.tools.libs.data_preprocess import get_column_info column_info = get_column_info(df) print("column_info") @@ -134,16 +134,12 @@ PRINT_DATA_COLUMNS = { "parameters": { "type": "object", "properties": { - "is_update": { - "type": "boolean", - "description": "Whether need to update the column info.", - }, "code": { "type": "string", "description": "The code to be added to a new cell in jupyter.", }, }, - "required": ["is_update", "code"], + "required": ["code"], }, } @@ -203,7 +199,7 @@ Specifically, {special_prompt} - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc.. # Available Tools (can be empty): -Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first. +Each Class tool is described in JSON format. When you call a tool, import the tool first. {tool_catalog} # Constraints: @@ -240,7 +236,7 @@ Strictly follow steps below when you writing code if it's convenient. - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc.. # Available Tools: -Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first. +Each Class tool is described in JSON format. When you call a tool, import the tool from its path first. {tool_catalog} # Output Example: @@ -248,7 +244,7 @@ when current task is "do data preprocess, like fill missing value, handle outlie ```python # Step 1: fill missing value # Tools used: ['FillMissingValue'] -from metagpt.tools.functions.libs.data_preprocess import FillMissingValue +from metagpt.tools.libs.data_preprocess import FillMissingValue train_processed = train.copy() test_processed = test.copy() diff --git a/metagpt/prompts/tool_type.py b/metagpt/prompts/tool_types.py similarity index 98% rename from metagpt/prompts/tool_type.py rename to metagpt/prompts/tool_types.py index 43ead78a6..c01a80310 100644 --- a/metagpt/prompts/tool_type.py +++ b/metagpt/prompts/tool_types.py @@ -39,7 +39,7 @@ The current task is about evaluating a model, please note the following: """ # Prompt for using tools of "vision" type -VISION_PROMPT = """ +IMAGE2WEBPAGE_PROMPT = """ The current task is about converting image into webpage code. please note the following: - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow. -""" \ No newline at end of file +""" diff --git a/metagpt/roles/code_interpreter.py b/metagpt/roles/code_interpreter.py index afd51a575..f972e72e2 100644 --- a/metagpt/roles/code_interpreter.py +++ b/metagpt/roles/code_interpreter.py @@ -5,6 +5,7 @@ from pydantic import Field from metagpt.actions.ask_review import ReviewConst from metagpt.actions.execute_code import ExecutePyCode from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools +from metagpt.actions.write_code_steps import WriteCodeSteps from metagpt.logs import logger from metagpt.roles import Role from metagpt.roles.tool_maker import ToolMaker @@ -16,6 +17,7 @@ class CodeInterpreter(Role): auto_run: bool = True use_tools: bool = False make_udfs: bool = False # whether to save user-defined functions + use_code_steps: bool = False execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True) def __init__( @@ -52,10 +54,14 @@ class CodeInterpreter(Role): async def _act_on_task(self, current_task: Task) -> TaskResult: code, result, is_success = await self._write_and_exec_code() - task_result = TaskResult(code=code['code'], result=result, is_success=is_success) + task_result = TaskResult(code=code, result=result, is_success=is_success) return task_result async def _write_and_exec_code(self, max_retry: int = 3): + self.planner.current_task.code_steps = ( + await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else "" + ) + counter = 0 success = False @@ -63,7 +69,7 @@ class CodeInterpreter(Role): ### write code ### code, cause_by = await self._write_code() - self.working_memory.add(Message(content=code['code'], role="assistant", cause_by=cause_by)) + self.working_memory.add(Message(content=code["code"], role="assistant", cause_by=cause_by)) ### execute code ### result, success = await self.execute_code.run(**code) @@ -72,7 +78,7 @@ class CodeInterpreter(Role): self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode)) ### process execution result ### - if "!pip" in code: + if "!pip" in code["code"]: success = False counter += 1 @@ -83,17 +89,15 @@ class CodeInterpreter(Role): if ReviewConst.CHANGE_WORD[0] in review: counter = 0 # redo the task again with help of human suggestions - return code, result, success + return code["code"], result, success async def _write_code(self): todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools() logger.info(f"ready to {todo.name}") context = self.planner.get_useful_memories() + # print(*context, sep="\n***\n") code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0) - # 暂时在这里转换 WriteCodeWithTools 的输出 - if isinstance(code, str): - code = {'code': code, 'language': 'python'} return code, todo diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index a60642bff..6b671f9c2 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -1,64 +1,43 @@ -from metagpt.actions.ask_review import ReviewConst from metagpt.actions.debug_code import DebugCode from metagpt.actions.execute_code import ExecutePyCode -from metagpt.actions.ml_da_action import Reflect, SummarizeAnalysis, UpdateDataColumns +from metagpt.actions.ml_da_action import UpdateDataColumns from metagpt.actions.write_analysis_code import WriteCodeWithToolsML -from metagpt.actions.write_code_steps import WriteCodeSteps from metagpt.logs import logger from metagpt.roles.code_interpreter import CodeInterpreter -from metagpt.roles.kaggle_manager import DownloadData, SubmitResult -from metagpt.schema import Message +from metagpt.tools.tool_data_type import ToolTypeEnum from metagpt.utils.common import any_to_str class MLEngineer(CodeInterpreter): - use_code_steps: bool = False - use_udfs: bool = False - data_desc: dict = {} debug_context: list = [] latest_code: str = "" def __init__(self, name="Mark", profile="MLEngineer", **kwargs): super().__init__(name=name, profile=profile, **kwargs) - # self._watch([DownloadData, SubmitResult]) # in multi-agent settings - - async def _plan_and_act(self): - ### a new attempt on the data, relevant in a multi-agent multi-turn setting ### - await self._prepare_data_context() - - ### general plan process ### - await super()._plan_and_act() - - ### summarize analysis ### - summary = await SummarizeAnalysis().run(self.planner.plan) - rsp = Message(content=summary, cause_by=SummarizeAnalysis) - self.rc.memory.add(rsp) - - return rsp - - async def _write_and_exec_code(self, max_retry: int = 3): - self.planner.current_task.code_steps = ( - await WriteCodeSteps().run(self.planner.plan) if self.use_code_steps else "" - ) - - code, result, success = await super()._write_and_exec_code(max_retry=max_retry) - - if success: - if self.use_tools and self.planner.current_task.task_type in ["data_preprocess", "feature_engineering"]: - update_success, new_code = await self._update_data_columns() - if update_success: - code = code + "\n\n" + new_code - - return code, result, success async def _write_code(self): if not self.use_tools: return await super()._write_code() - code_execution_count = sum([msg.cause_by == any_to_str(ExecutePyCode) for msg in self.working_memory.get()]) + # In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is. + is_first_trial = any_to_str(ExecutePyCode) not in [msg.cause_by for msg in self.working_memory.get()] - if code_execution_count > 0: - logger.warning("We got a bug code, now start to debug...") + if is_first_trial: + # For the first trial, write task code from scratch + column_info = await self._update_data_columns() + + logger.info("Write code with tools") + tool_context, code = await WriteCodeWithToolsML().run( + context=[], # context assembled inside the Action + plan=self.planner.plan, + column_info=column_info, + ) + self.debug_context = tool_context + cause_by = WriteCodeWithToolsML + + else: + # Previous trials resulted in error, debug and rewrite the code + logger.warning("We got a bug, now start to debug...") code = await DebugCode().run( code=self.latest_code, runtime_result=self.working_memory.get(), @@ -67,49 +46,21 @@ class MLEngineer(CodeInterpreter): logger.info(f"new code \n{code}") cause_by = DebugCode - else: - logger.info("Write code with tools") - tool_context, code = await WriteCodeWithToolsML().run( - context=[], # context assembled inside the Action - plan=self.planner.plan, - column_info=self.data_desc.get("column_info", ""), - ) - self.debug_context = tool_context - cause_by = WriteCodeWithToolsML - - self.latest_code = code + self.latest_code = code["code"] return code, cause_by async def _update_data_columns(self): + current_task = self.planner.plan.current_task + if current_task.task_type not in [ + ToolTypeEnum.DATA_PREPROCESS.value, + ToolTypeEnum.FEATURE_ENGINEERING.value, + ToolTypeEnum.MODEL_TRAIN.value, + ]: + return "" logger.info("Check columns in updated data") - rsp = await UpdateDataColumns().run(self.planner.plan) - is_update, code = rsp["is_update"], rsp["code"] + code = await UpdateDataColumns().run(self.planner.plan) success = False - if is_update: - result, success = await self.execute_code.run(code) - if success: - print(result) - self.data_desc["column_info"] = result - return success, code - - async def _prepare_data_context(self): - memories = self.get_memories() - if memories: - latest_event = memories[-1].cause_by - if latest_event == DownloadData: - self.planner.plan.context = memories[-1].content - elif latest_event == SubmitResult: - # self reflect on previous plan outcomes and think about how to improve the plan, add to working memory - await self._reflect() - - # get feedback for improvement from human, add to working memory - await self.planner.ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER) - - async def _reflect(self): - context = self.get_memories() - context = "\n".join([str(msg) for msg in context]) - - reflection = await Reflect().run(context=context) - self.working_memory.add(Message(content=reflection, role="assistant")) - self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user")) + result, success = await self.execute_code.run(**code) + print(result) + return result if success else "" diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index 222edf312..bb87f1b62 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -7,17 +7,11 @@ """ from enum import Enum +from metagpt.tools import tool_types # this registers all tool types +from metagpt.tools import libs # this registers all tools +from metagpt.tools.tool_registry import TOOL_REGISTRY -from pydantic import BaseModel - -from metagpt.const import TOOL_LIBS_PATH -from metagpt.prompts.tool_type import ( - DATA_PREPROCESS_PROMPT, - FEATURE_ENGINEERING_PROMPT, - MODEL_TRAIN_PROMPT, - MODEL_EVALUATE_PROMPT, - VISION_PROMPT, -) +_ = tool_types, libs, TOOL_REGISTRY # Avoid pre-commit error class SearchEngineType(Enum): @@ -37,62 +31,3 @@ class WebBrowserEngineType(Enum): def __missing__(cls, key): """Default type conversion""" return cls.CUSTOM - - -class ToolType(BaseModel): - name: str - module: str = "" - desc: str - usage_prompt: str = "" - - -TOOL_TYPE_MAPPINGS = { - "data_preprocess": ToolType( - name="data_preprocess", - module=str(TOOL_LIBS_PATH / "data_preprocess"), - desc="Only for changing value inplace.", - usage_prompt=DATA_PREPROCESS_PROMPT, - ), - "feature_engineering": ToolType( - name="feature_engineering", - module=str(TOOL_LIBS_PATH / "feature_engineering"), - desc="Only for creating new columns for input data.", - usage_prompt=FEATURE_ENGINEERING_PROMPT, - ), - "model_train": ToolType( - name="model_train", - module="", - desc="Only for training model.", - usage_prompt=MODEL_TRAIN_PROMPT, - ), - "model_evaluate": ToolType( - name="model_evaluate", - module="", - desc="Only for evaluating model.", - usage_prompt=MODEL_EVALUATE_PROMPT, - ), - "stable_diffusion": ToolType( - name="stable_diffusion", - module="metagpt.tools.sd_engine", - desc="Related to text2image, image2image using stable diffusion model.", - usage_prompt="", - ), - "scrape_web": ToolType( - name="scrape_web", - module="metagpt.tools.functions.libs.scrape_web.scrape_web", - desc="Scrape data from web page.", - usage_prompt="", - ), - "vision": ToolType( - name="vision", - module=str(TOOL_LIBS_PATH / "vision"), - desc="Only for converting image into webpage code.", - usage_prompt=VISION_PROMPT, - ), - "other": ToolType( - name="other", - module="", - desc="Any tasks that do not fit into the previous categories", - usage_prompt="", - ), -} diff --git a/metagpt/tools/functions/__init__.py b/metagpt/tools/functions/__init__.py deleted file mode 100644 index a0a43f507..000000000 --- a/metagpt/tools/functions/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:32 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/metagpt/tools/functions/libs/__init__.py b/metagpt/tools/functions/libs/__init__.py deleted file mode 100644 index a0a43f507..000000000 --- a/metagpt/tools/functions/libs/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/16 16:32 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/metagpt/tools/functions/libs/base.py b/metagpt/tools/functions/libs/base.py deleted file mode 100644 index c39adc66b..000000000 --- a/metagpt/tools/functions/libs/base.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/12/10 20:12 -# @Author : lidanyang -# @File : base -# @Desc : -class MLProcess(object): - def fit(self, df): - raise NotImplementedError - - def transform(self, df): - raise NotImplementedError - - def fit_transform(self, df): - self.fit(df) - return self.transform(df) diff --git a/metagpt/tools/functions/libs/scrape_web/__init__.py b/metagpt/tools/functions/libs/scrape_web/__init__.py deleted file mode 100644 index d5cd1524b..000000000 --- a/metagpt/tools/functions/libs/scrape_web/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py deleted file mode 100644 index 6644565d7..000000000 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ /dev/null @@ -1,126 +0,0 @@ -import ast -import os -import re -import yaml -import inspect -import importlib -from pathlib import Path -from typing import List -from metagpt.logs import logger - - -def extract_function_signatures(file_path): - with open(file_path, "r", encoding="utf-8") as file: - source_code = file.read() - - tree = ast.parse(source_code) - function_signatures = [] - function_returns = [] - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef): - # 只提取用户自定义函数,排除内置函数 - if not (node.name.startswith("__") and node.name.endswith("__")): - # 获取函数名 - function_name = node.name - # 获取参数列表 - args = [arg.arg for arg in node.args.args] - # 获取函数签名 - function_signature = f"{function_name}({', '.join(args)})" - # 导入函数 - module_name = Path(file_path).parts[-1][: -len(Path(file_path).suffix)] - module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}") - # 将函数导入到当前命名空间 - globals().update({function_name: getattr(module, function_name)}) - # 获取函数注释和函数路径 - function_schema = { - "udf_name": function_signature, - "udf_path": f"from metagpt.tools.functions.libs.udf.{module_name} import {function_name}", - "udf_doc": inspect.getdoc(getattr(module, function_name)), - } - function_signatures.append(function_schema) - # 获取函数返回变量名 - source_lines, _ = inspect.getsourcelines(getattr(module, function_name)) - for line in source_lines: - if line.strip().startswith("return "): - function_returns.append( - { - "udf_name": function_name, - "udf_returns": [var.strip() for var in line.strip()[len("return ") :].split(",")], - } - ) - break - - # 没有返回值的函数 - if not function_returns or function_returns[-1]["udf_name"] != function_name: - function_returns.append({"udf_name": function_name, "udf_returns": [None]}) - return function_signatures, function_returns - - -def get_function_signatures_in_folder(folder_path): - python_files = [f for f in os.listdir(folder_path) if f.endswith(".py") and f != "__init__.py"] - all_function_signatures = [] - all_function_returns = [] - - for file_name in python_files: - file_path = os.path.join(folder_path, file_name) - function_signatures, function_returns = extract_function_signatures(file_path) - all_function_signatures.extend(function_signatures) - all_function_returns.extend(function_returns) - return all_function_signatures, all_function_returns - - -# Create Tools Yaml Style Schema -def docstring_to_yaml(docstring: str, return_vars: List[str] = None): - logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n") - if docstring is None: - return {} - # 匹配简介部分 - description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", docstring, re.DOTALL) - description = description_match.group(1).strip() if description_match else "" - - # 匹配Args部分 - args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", docstring, re.DOTALL) - _args = args_match.group(1).strip() if args_match else "" - variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)") - params = variable_pattern.findall(_args) - if not params: - params = ((None, None, None),) - # 匹配Returns部分 - returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", docstring, re.DOTALL) - returns = returns_match.group(1).strip() if returns_match else "" - return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$") - # 添加返回值变量名 - return_vars = return_vars if isinstance(return_vars, list) else [return_vars] - returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)] - # 构建YAML字典 - yaml_data = { - "description": description.strip(".").strip(), - "parameters": { - "properties": { - param[0]: {"type": param[1], "description": param[2]} for param in params if param[0] is not None - }, - "required": [param[0] for param in params if param[0] is not None], - }, - "returns": {ret[0]: {"type": ret[1], "description": ret[2]} for ret in returns}, - } - return yaml_data - - -def extract_function_schema_yaml_in_folder(folder_path: str): - function_signatures, function_returns = get_function_signatures_in_folder(folder_path) - function_schema_yaml_data = {} - for func_docstring, func_returns in zip(function_signatures, function_returns): - if func_docstring["udf_doc"]: - fun_yaml_data = docstring_to_yaml(func_docstring["udf_doc"], func_returns["udf_returns"]) - fun_yaml_data.update({"type": "function"}) - function_schema_yaml_data.update({func_returns["udf_name"]: fun_yaml_data}) - return yaml.dump(function_schema_yaml_data, default_flow_style=False) - - -folder_path = str(Path(__file__).parent.absolute()) -function_signatures, function_returns = get_function_signatures_in_folder(folder_path) - -UDFS = [func for func in function_signatures] - -UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path) -UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader) diff --git a/metagpt/tools/functions/schemas/data_preprocess.yml b/metagpt/tools/functions/schemas/data_preprocess.yml deleted file mode 100644 index 4de697abd..000000000 --- a/metagpt/tools/functions/schemas/data_preprocess.yml +++ /dev/null @@ -1,306 +0,0 @@ -FillMissingValue: - type: class - description: "Completing missing values with simple strategies" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - strategy: - type: str - description: "the imputation strategy, notice mean/median can only be used for numeric features" - default: mean - enum: - - mean - - median - - most_frequent - - constant - fill_value: - type: int - description: "fill_value is used to replace all occurrences of missing_values" - default: null - required: - - features - fit: - description: "Fit the FillMissingValue model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -MinMaxScale: - type: class - description: "Transform features by scaling each feature to a range, witch is (0, 1)" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the MinMaxScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -StandardScale: - type: class - description: "Standardize features by removing the mean and scaling to unit variance" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the StandardScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -MaxAbsScale: - type: class - description: "cale each feature by its maximum absolute value" - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "columns to be processed" - required: - - features - fit: - description: "Fit the MaxAbsScale model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -LabelEncode: - type: class - description: "Apply label encoding to specified categorical columns in-place." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "Categorical columns to be label encoded" - required: - - features - fit: - description: "Fit the LabelEncode model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - -OneHotEncode: - type: class - description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped." - methods: - __init__: - description: "Initialize self." - parameters: - properties: - features: - type: list - description: "Categorical columns to be one-hot encoded and dropped" - required: - - features - fit: - description: "Fit the OneHotEncoding model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - transform: - description: "Transform the input DataFrame with the fitted model." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." - fit_transform: - description: "Fit and transform the input DataFrame." - parameters: - properties: - df: - type: DataFrame - description: "The input DataFrame." - required: - - df - returns: - df: - type: DataFrame - description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/libs/__init__.py b/metagpt/tools/libs/__init__.py new file mode 100644 index 000000000..442f57149 --- /dev/null +++ b/metagpt/tools/libs/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2023/11/16 16:32 +# @Author : lidanyang +# @File : __init__.py +# @Desc : +from metagpt.tools.libs import ( + data_preprocess, + feature_engineering, + sd_engine, + gpt_v_generator, + web_scrapping, +) + +_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping # Avoid pre-commit error diff --git a/metagpt/tools/functions/libs/data_preprocess.py b/metagpt/tools/libs/data_preprocess.py similarity index 88% rename from metagpt/tools/functions/libs/data_preprocess.py rename to metagpt/tools/libs/data_preprocess.py index f423f2020..3891f9df0 100644 --- a/metagpt/tools/functions/libs/data_preprocess.py +++ b/metagpt/tools/libs/data_preprocess.py @@ -13,9 +13,25 @@ from sklearn.preprocessing import ( StandardScaler, ) -from metagpt.tools.functions.libs.base import MLProcess +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool + +TOOL_TYPE = ToolTypeEnum.DATA_PREPROCESS.value +class MLProcess(object): + def fit(self, df): + raise NotImplementedError + + def transform(self, df): + raise NotImplementedError + + def fit_transform(self, df): + self.fit(df) + return self.transform(df) + + +@register_tool(tool_type=TOOL_TYPE) class FillMissingValue(MLProcess): def __init__( self, @@ -42,6 +58,7 @@ class FillMissingValue(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class MinMaxScale(MLProcess): def __init__( self, @@ -60,6 +77,7 @@ class MinMaxScale(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class StandardScale(MLProcess): def __init__( self, @@ -78,6 +96,7 @@ class StandardScale(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class MaxAbsScale(MLProcess): def __init__( self, @@ -96,6 +115,7 @@ class MaxAbsScale(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class RobustScale(MLProcess): def __init__( self, @@ -114,6 +134,7 @@ class RobustScale(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class OrdinalEncode(MLProcess): def __init__( self, @@ -132,6 +153,7 @@ class OrdinalEncode(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class OneHotEncode(MLProcess): def __init__( self, @@ -153,6 +175,7 @@ class OneHotEncode(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class LabelEncode(MLProcess): def __init__( self, @@ -181,6 +204,7 @@ class LabelEncode(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) def get_column_info(df: pd.DataFrame) -> dict: column_info = { "Category": [], diff --git a/metagpt/tools/functions/libs/feature_engineering.py b/metagpt/tools/libs/feature_engineering.py similarity index 94% rename from metagpt/tools/functions/libs/feature_engineering.py rename to metagpt/tools/libs/feature_engineering.py index 0d9584b4a..308150f9b 100644 --- a/metagpt/tools/functions/libs/feature_engineering.py +++ b/metagpt/tools/libs/feature_engineering.py @@ -6,7 +6,7 @@ # @Desc : Feature Engineering Tools import itertools -import lightgbm as lgb +# import lightgbm as lgb import numpy as np import pandas as pd from joblib import Parallel, delayed @@ -15,9 +15,14 @@ from sklearn.feature_selection import VarianceThreshold from sklearn.model_selection import KFold from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures -from metagpt.tools.functions.libs.base import MLProcess +from metagpt.tools.libs.data_preprocess import MLProcess +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool + +TOOL_TYPE = ToolTypeEnum.FEATURE_ENGINEERING.value +@register_tool(tool_type=TOOL_TYPE) class PolynomialExpansion(MLProcess): def __init__(self, cols: list, degree: int = 2, label_col: str = None): self.cols = cols @@ -48,6 +53,7 @@ class PolynomialExpansion(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class CatCount(MLProcess): def __init__(self, col: str): self.col = col @@ -62,6 +68,7 @@ class CatCount(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class TargetMeanEncoder(MLProcess): def __init__(self, col: str, label: str): self.col = col @@ -77,6 +84,7 @@ class TargetMeanEncoder(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class KFoldTargetMeanEncoder(MLProcess): def __init__(self, col: str, label: str, n_splits: int = 5, random_state: int = 2021): self.col = col @@ -103,6 +111,7 @@ class KFoldTargetMeanEncoder(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class CatCross(MLProcess): def __init__(self, cols: list, max_cat_num: int = 100): self.cols = cols @@ -138,6 +147,7 @@ class CatCross(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class GroupStat(MLProcess): def __init__(self, group_col: str, agg_col: str, agg_funcs: list): self.group_col = group_col @@ -157,6 +167,7 @@ class GroupStat(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class SplitBins(MLProcess): def __init__(self, cols: list, strategy: str = "quantile"): self.cols = cols @@ -173,6 +184,7 @@ class SplitBins(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class ExtractTimeComps(MLProcess): def __init__(self, time_col: str, time_comps: list): self.time_col = time_col @@ -201,6 +213,7 @@ class ExtractTimeComps(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class GeneralSelection(MLProcess): def __init__(self, label_col: str): self.label_col = label_col @@ -228,6 +241,7 @@ class GeneralSelection(MLProcess): return new_df +# skip for now because lgb is needed class TreeBasedSelection(MLProcess): def __init__(self, label_col: str, task_type: str): self.label_col = label_col @@ -270,6 +284,7 @@ class TreeBasedSelection(MLProcess): return new_df +@register_tool(tool_type=TOOL_TYPE) class VarianceBasedSelection(MLProcess): def __init__(self, label_col: str, threshold: float = 0): self.label_col = label_col diff --git a/metagpt/tools/functions/libs/vision.py b/metagpt/tools/libs/gpt_v_generator.py similarity index 85% rename from metagpt/tools/functions/libs/vision.py rename to metagpt/tools/libs/gpt_v_generator.py index b10ad7608..58e547840 100644 --- a/metagpt/tools/functions/libs/vision.py +++ b/metagpt/tools/libs/gpt_v_generator.py @@ -5,18 +5,13 @@ @Author : mannaandpoem @File : vision.py """ +import base64 from pathlib import Path import requests -import base64 - -from metagpt.config import CONFIG - -OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL -API_KEY = CONFIG.OPENAI_API_KEY -MODEL = CONFIG.OPENAI_VISION_MODEL -MAX_TOKENS = CONFIG.VISION_MAX_TOKENS +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image: @@ -33,8 +28,15 @@ As the design pays tribute to large companies, sometimes it is normal for some c Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:""" -class Vision: +@register_tool(tool_type=ToolTypeEnum.IMAGE2WEBPAGE.value) +class GPTvGenerator: def __init__(self): + from metagpt.config import CONFIG + + OPENAI_API_BASE = CONFIG.OPENAI_BASE_URL + API_KEY = CONFIG.OPENAI_API_KEY + MODEL = CONFIG.OPENAI_VISION_MODEL + MAX_TOKENS = CONFIG.VISION_MAX_TOKENS self.api_key = API_KEY self.api_base = OPENAI_API_BASE self.model = MODEL @@ -51,10 +53,7 @@ class Vision: def get_result(self, image_path, prompt): base64_image = self.encode_image(image_path) - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {self.api_key}" - } + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} payload = { "model": self.model, "messages": [ @@ -62,11 +61,8 @@ class Vision: "role": "user", "content": [ {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} - } - ] + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, + ], } ], "max_tokens": self.max_tokens, @@ -81,7 +77,7 @@ class Vision: @staticmethod def encode_image(image_path): with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode('utf-8') + return base64.b64encode(image_file.read()).decode("utf-8") @staticmethod def save_webpages(image_path, webpages) -> Path: diff --git a/metagpt/tools/sd_engine.py b/metagpt/tools/libs/sd_engine.py similarity index 95% rename from metagpt/tools/sd_engine.py rename to metagpt/tools/libs/sd_engine.py index ba61fd496..794758f77 100644 --- a/metagpt/tools/sd_engine.py +++ b/metagpt/tools/libs/sd_engine.py @@ -13,9 +13,10 @@ import requests from aiohttp import ClientSession from PIL import Image, PngImagePlugin -from metagpt.config import CONFIG from metagpt.const import SD_OUTPUT_FILE_REPO from metagpt.logs import logger +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool payload = { "prompt": "", @@ -51,8 +52,11 @@ payload = { default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution" +@register_tool(tool_type=ToolTypeEnum.STABLE_DIFFUSION.value) class SDEngine: def __init__(self, sd_url=""): + from metagpt.config import CONFIG + # Initialize the SDEngine with configuration self.sd_url = sd_url if sd_url else CONFIG.get("SD_URL") self.sd_t2i_url = f"{self.sd_url}{CONFIG.get('SD_T2I_API')}" diff --git a/metagpt/tools/functions/libs/scrape_web/scrape_web.py b/metagpt/tools/libs/web_scrapping.py similarity index 76% rename from metagpt/tools/functions/libs/scrape_web/scrape_web.py rename to metagpt/tools/libs/web_scrapping.py index e68ce0e64..e8e73f123 100644 --- a/metagpt/tools/functions/libs/scrape_web/scrape_web.py +++ b/metagpt/tools/libs/web_scrapping.py @@ -1,9 +1,10 @@ -import asyncio - +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper -async def scrape_web(url, *urls): +@register_tool(tool_type=ToolTypeEnum.WEBSCRAPING.value) +async def scrape_web_playwright(url, *urls): """ Scrape and save the HTML structure and inner text content of a web page using Playwright. @@ -19,5 +20,3 @@ async def scrape_web(url, *urls): # Return the inner text content of the web page return {"inner_text": web.inner_text, "html": web.html} - -# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP diff --git a/metagpt/tools/functions/schemas/__init__.py b/metagpt/tools/schemas/__init__.py similarity index 100% rename from metagpt/tools/functions/schemas/__init__.py rename to metagpt/tools/schemas/__init__.py diff --git a/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml b/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml new file mode 100644 index 000000000..44c830a1e --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/FillMissingValue.yml @@ -0,0 +1,61 @@ +FillMissingValue: + type: class + description: "Completing missing values with simple strategies" + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "columns to be processed" + strategy: + type: str + description: "the imputation strategy, notice mean/median can only be used for numeric features" + default: mean + enum: + - mean + - median + - most_frequent + - constant + fill_value: + type: int + description: "fill_value is used to replace all occurrences of missing_values" + default: null + required: + - features + fit: + description: "Fit the FillMissingValue model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." diff --git a/metagpt/tools/schemas/data_preprocess/LabelEncode.yml b/metagpt/tools/schemas/data_preprocess/LabelEncode.yml new file mode 100644 index 000000000..419ef60a8 --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/LabelEncode.yml @@ -0,0 +1,48 @@ +LabelEncode: + type: class + description: "Apply label encoding to specified categorical columns in-place." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "Categorical columns to be label encoded" + required: + - features + fit: + description: "Fit the LabelEncode model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." diff --git a/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml b/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml new file mode 100644 index 000000000..3e17cfdd0 --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/MaxAbsScale.yml @@ -0,0 +1,48 @@ +MaxAbsScale: + type: class + description: "cale each feature by its maximum absolute value" + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "columns to be processed" + required: + - features + fit: + description: "Fit the MaxAbsScale model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml b/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml new file mode 100644 index 000000000..8f050d942 --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/MinMaxScale.yml @@ -0,0 +1,48 @@ +MinMaxScale: + type: class + description: "Transform features by scaling each feature to a range, witch is (0, 1)" + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "columns to be processed" + required: + - features + fit: + description: "Fit the MinMaxScale model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." diff --git a/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml b/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml new file mode 100644 index 000000000..f499b2cb8 --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/OneHotEncode.yml @@ -0,0 +1,48 @@ +OneHotEncode: + type: class + description: "Apply one-hot encoding to specified categorical columns, the original columns will be dropped." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "Categorical columns to be one-hot encoded and dropped" + required: + - features + fit: + description: "Fit the OneHotEncoding model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." diff --git a/metagpt/tools/schemas/data_preprocess/StandardScale.yml b/metagpt/tools/schemas/data_preprocess/StandardScale.yml new file mode 100644 index 000000000..cf6e7d57b --- /dev/null +++ b/metagpt/tools/schemas/data_preprocess/StandardScale.yml @@ -0,0 +1,48 @@ +StandardScale: + type: class + description: "Standardize features by removing the mean and scaling to unit variance" + methods: + __init__: + description: "Initialize self." + parameters: + properties: + features: + type: list + description: "columns to be processed" + required: + - features + fit: + description: "Fit the StandardScale model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." diff --git a/metagpt/tools/schemas/feature_engineering/CatCount.yml b/metagpt/tools/schemas/feature_engineering/CatCount.yml new file mode 100644 index 000000000..049fc7879 --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/CatCount.yml @@ -0,0 +1,48 @@ +CatCount: + type: class + description: "Add value counts of a categorical column as new feature." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + col: + type: str + description: "Column for value counts." + required: + - col + fit: + description: "Fit the CatCount model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/CatCross.yml b/metagpt/tools/schemas/feature_engineering/CatCross.yml new file mode 100644 index 000000000..5d6303439 --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/CatCross.yml @@ -0,0 +1,52 @@ +CatCross: + type: class + description: "Add pairwise crossed features and convert them to numerical features." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + cols: + type: list + description: "Columns to be pairwise crossed, at least 2 columns." + max_cat_num: + type: int + description: "Maximum unique categories per crossed feature." + default: 100 + required: + - cols + fit: + description: "Fit the CatCross model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml b/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml new file mode 100644 index 000000000..2ebf5b397 --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/GeneralSelection.yml @@ -0,0 +1,48 @@ +GeneralSelection: + type: class + description: "Drop all nan feats and feats with only one unique value." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + label_col: + type: str + description: "Label column name." + required: + - label_col + fit: + description: "Fit the GeneralSelection model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/GroupStat.yml b/metagpt/tools/schemas/feature_engineering/GroupStat.yml new file mode 100644 index 000000000..6e0ba2877 --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/GroupStat.yml @@ -0,0 +1,58 @@ +GroupStat: + type: class + description: "Aggregate specified column in a DataFrame grouped by another column, adding new features named '__by_'." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + group_col: + type: str + description: "Column used for grouping." + agg_col: + type: str + description: "Column on which aggregation is performed." + agg_funcs: + type: list + description: >- + List of aggregation functions to apply, such as ['mean', 'std']. + Each function must be supported by pandas. + required: + - group_col + - agg_col + - agg_funcs + fit: + description: "Fit the GroupStat model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml b/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml new file mode 100644 index 000000000..79a673f9f --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/KFoldTargetMeanEncoder.yml @@ -0,0 +1,60 @@ +KFoldTargetMeanEncoder: + type: class + description: "Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + col: + type: str + description: "Column to be k-fold mean encoded." + label: + type: str + description: "Predicted label column." + n_splits: + type: int + description: "Number of splits for K-fold." + default: 5 + random_state: + type: int + description: "Random seed." + default: 2021 + required: + - col + - label + fit: + description: "Fit the KFoldTargetMeanEncoder model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/functions/schemas/feature_engineering.yml b/metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml similarity index 100% rename from metagpt/tools/functions/schemas/feature_engineering.yml rename to metagpt/tools/schemas/feature_engineering/PolynomialExpansion.yml diff --git a/metagpt/tools/schemas/feature_engineering/SplitBins.yml b/metagpt/tools/schemas/feature_engineering/SplitBins.yml new file mode 100644 index 000000000..4e0171406 --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/SplitBins.yml @@ -0,0 +1,56 @@ +SplitBins: + type: class + description: "Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + cols: + type: list + description: "Columns to be binned inplace." + strategy: + type: str + description: "Strategy used to define the widths of the bins." + default: quantile + enum: + - quantile + - uniform + - kmeans + required: + - cols + fit: + description: "Fit the SplitBins model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml b/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml new file mode 100644 index 000000000..86416ccbb --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/TargetMeanEncoder.yml @@ -0,0 +1,52 @@ +TargetMeanEncoder: + type: class + description: "Encodes a categorical column by the mean of the label column, and adds the result as a new feature." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + col: + type: str + description: "Column to be mean encoded." + label: + type: str + description: "Predicted label column." + required: + - col + - label + fit: + description: "Fit the TargetMeanEncoder model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml b/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml new file mode 100644 index 000000000..c210effea --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/TreeBasedSelection.yml @@ -0,0 +1,56 @@ +TreeBasedSelection: + type: class + description: "Select features based on tree-based model and remove features with low importance." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + label_col: + type: str + description: "Label column name." + task_type: + type: str + description: "Task type, 'cls' for classification, 'mcls' for multi-class classification, 'reg' for regression." + enum: + - cls + - mcls + - reg + required: + - label_col + - task_type + fit: + description: "Fit the TreeBasedSelection model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame contain label_col." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame contain label_col." \ No newline at end of file diff --git a/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml b/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml new file mode 100644 index 000000000..6da4c3e7f --- /dev/null +++ b/metagpt/tools/schemas/feature_engineering/VarianceBasedSelection.yml @@ -0,0 +1,52 @@ +VarianceBasedSelection: + type: class + description: "Select features based on variance and remove features with low variance." + methods: + __init__: + description: "Initialize self." + parameters: + properties: + label_col: + type: str + description: "Label column name." + threshold: + type: float + description: "Threshold for variance." + default: 0.0 + required: + - label_col + fit: + description: "Fit the VarianceBasedSelection model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + transform: + description: "Transform the input DataFrame with the fitted model." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame contain label_col." + fit_transform: + description: "Fit and transform the input DataFrame." + parameters: + properties: + df: + type: DataFrame + description: "The input DataFrame." + required: + - df + returns: + df: + type: DataFrame + description: "The transformed DataFrame contain label_col." \ No newline at end of file diff --git a/metagpt/tools/functions/schemas/vision.yml b/metagpt/tools/schemas/image2webpage/GPTvGenerator.yml similarity index 93% rename from metagpt/tools/functions/schemas/vision.yml rename to metagpt/tools/schemas/image2webpage/GPTvGenerator.yml index 4cb247419..4087f7c12 100644 --- a/metagpt/tools/functions/schemas/vision.yml +++ b/metagpt/tools/schemas/image2webpage/GPTvGenerator.yml @@ -1,4 +1,4 @@ -Vision: +GPTvGenerator: type: class description: "Class for generating web pages at once." methods: diff --git a/metagpt/tools/functions/schemas/stable_diffusion.yml b/metagpt/tools/schemas/stable_diffusion/SDEngine.yml similarity index 100% rename from metagpt/tools/functions/schemas/stable_diffusion.yml rename to metagpt/tools/schemas/stable_diffusion/SDEngine.yml diff --git a/metagpt/tools/functions/schemas/scrape_web.yml b/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml similarity index 96% rename from metagpt/tools/functions/schemas/scrape_web.yml rename to metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml index ecca3fbed..a6ff7d6c7 100644 --- a/metagpt/tools/functions/schemas/scrape_web.yml +++ b/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml @@ -1,4 +1,4 @@ -scrape_web: +scrape_web_playwright: type: async funciton description: "Scrape and save the HTML structure and inner text content of a web page using Playwright." parameters: diff --git a/metagpt/tools/tool_data_type.py b/metagpt/tools/tool_data_type.py new file mode 100644 index 000000000..0c4eea4cc --- /dev/null +++ b/metagpt/tools/tool_data_type.py @@ -0,0 +1,35 @@ +from enum import Enum + +from pydantic import BaseModel + + +class ToolTypeEnum(Enum): + EDA = "eda" + DATA_PREPROCESS = "data_preprocess" + FEATURE_ENGINEERING = "feature_engineering" + MODEL_TRAIN = "model_train" + MODEL_EVALUATE = "model_evaluate" + STABLE_DIFFUSION = "stable_diffusion" + IMAGE2WEBPAGE = "image2webpage" + WEBSCRAPING = "web_scraping" + OTHER = "other" + + def __missing__(self, key): + return self.OTHER + + +class ToolType(BaseModel): + name: str + desc: str + usage_prompt: str = "" + + +class ToolSchema(BaseModel): + name: str + + +class Tool(BaseModel): + name: str + path: str + schemas: dict = {} + code: str = "" diff --git a/metagpt/tools/tool_registry.py b/metagpt/tools/tool_registry.py new file mode 100644 index 000000000..52ad25ce4 --- /dev/null +++ b/metagpt/tools/tool_registry.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/01/12 17:07 +@Author : garylin2099 +@File : tool_registry.py +""" +import inspect +import os +import re +from collections import defaultdict + +import yaml + +from metagpt.const import TOOL_SCHEMA_PATH +from metagpt.logs import logger +from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolType + + +class ToolRegistry: + def __init__(self): + self.tools = {} + self.tool_types = {} + self.tools_by_types = defaultdict(dict) # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...} + + def register_tool_type(self, tool_type: ToolType): + self.tool_types[tool_type.name] = tool_type + logger.info(f"tool type {tool_type.name} registered") + + def register_tool( + self, + tool_name, + tool_path, + schema_path=None, + tool_code="", + tool_type="other", + make_schema_if_not_exists=False, + ): + if self.has_tool(tool_name): + return + + schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type / f"{tool_name}.yml" + + if not os.path.exists(schema_path): + if make_schema_if_not_exists: + logger.warning(f"no schema found, will make schema at {schema_path}") + make_schema(tool_code, schema_path) + else: + logger.warning(f"no schema found at assumed schema_path {schema_path}, skip registering {tool_name}") + return + + with open(schema_path, "r", encoding="utf-8") as f: + schema_dict = yaml.safe_load(f) + schemas = schema_dict.get(tool_name) or list(schema_dict.values())[0] + schemas["tool_path"] = tool_path # corresponding code file path of the tool + try: + ToolSchema(**schemas) # validation + except Exception: + pass + # logger.warning( + # f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}" + # ) + tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code) + self.tools[tool_name] = tool + self.tools_by_types[tool_type][tool_name] = tool + logger.info(f"{tool_name} registered") + + def has_tool(self, key): + return key in self.tools + + def get_tool(self, key): + return self.tools.get(key) + + def get_tools_by_type(self, key): + return self.tools_by_types.get(key) + + def has_tool_type(self, key): + return key in self.tool_types + + def get_tool_type(self, key): + return self.tool_types.get(key) + + def get_tool_types(self): + return self.tool_types + + +# Registry instance +TOOL_REGISTRY = ToolRegistry() + + +def register_tool_type(cls): + """register a tool type to registry""" + TOOL_REGISTRY.register_tool_type(tool_type=cls()) + return cls + + +def register_tool(tool_name="", tool_type="other", schema_path=None): + """register a tool to registry""" + + def decorator(cls, tool_name=tool_name): + tool_name = tool_name or cls.__name__ + + # Get the file path where the function / class is defined and the source code + file_path = inspect.getfile(cls) + if "metagpt" in file_path: + file_path = re.search("metagpt.+", file_path).group(0) + source_code = inspect.getsource(cls) + + TOOL_REGISTRY.register_tool( + tool_name=tool_name, + tool_path=file_path, + schema_path=schema_path, + tool_code=source_code, + tool_type=tool_type, + ) + return cls + + return decorator + + +def make_schema(tool_code, path): + os.makedirs(os.path.dirname(path), exist_ok=True) # Create the necessary directories + schema = {} # an empty schema for now + with open(path, "w", encoding="utf-8") as f: + yaml.dump(schema, f) + return path diff --git a/metagpt/tools/tool_types.py b/metagpt/tools/tool_types.py new file mode 100644 index 000000000..35c0772b1 --- /dev/null +++ b/metagpt/tools/tool_types.py @@ -0,0 +1,68 @@ +from metagpt.prompts.tool_types import ( + DATA_PREPROCESS_PROMPT, + FEATURE_ENGINEERING_PROMPT, + IMAGE2WEBPAGE_PROMPT, + MODEL_EVALUATE_PROMPT, + MODEL_TRAIN_PROMPT, +) +from metagpt.tools.tool_data_type import ToolType, ToolTypeEnum +from metagpt.tools.tool_registry import register_tool_type + + +@register_tool_type +class EDA(ToolType): + name: str = ToolTypeEnum.EDA.value + desc: str = "For performing exploratory data analysis" + + +@register_tool_type +class DataPreprocess(ToolType): + name: str = ToolTypeEnum.DATA_PREPROCESS.value + desc: str = "Only for changing value inplace." + usage_prompt: str = DATA_PREPROCESS_PROMPT + + +@register_tool_type +class FeatureEngineer(ToolType): + name: str = ToolTypeEnum.FEATURE_ENGINEERING.value + desc: str = "Only for creating new columns for input data." + usage_prompt: str = FEATURE_ENGINEERING_PROMPT + + +@register_tool_type +class ModelTrain(ToolType): + name: str = ToolTypeEnum.MODEL_TRAIN.value + desc: str = "Only for training model." + usage_prompt: str = MODEL_TRAIN_PROMPT + + +@register_tool_type +class ModelEvaluate(ToolType): + name: str = ToolTypeEnum.MODEL_EVALUATE.value + desc: str = "Only for evaluating model." + usage_prompt: str = MODEL_EVALUATE_PROMPT + + +@register_tool_type +class StableDiffusion(ToolType): + name: str = ToolTypeEnum.STABLE_DIFFUSION.value + desc: str = "Related to text2image, image2image using stable diffusion model." + + +@register_tool_type +class Image2Webpage(ToolType): + name: str = ToolTypeEnum.IMAGE2WEBPAGE.value + desc: str = "For converting image into webpage code." + usage_prompt: str = IMAGE2WEBPAGE_PROMPT + + +@register_tool_type +class WebScraping(ToolType): + name: str = ToolTypeEnum.WEBSCRAPING.value + desc: str = "For scraping data from web pages." + + +@register_tool_type +class Other(ToolType): + name: str = ToolTypeEnum.OTHER.value + desc: str = "Any tools not in the defined categories" diff --git a/metagpt/tools/web_browser_engine_playwright.py b/metagpt/tools/web_browser_engine_playwright.py index a45f6a12e..15c8a78d7 100644 --- a/metagpt/tools/web_browser_engine_playwright.py +++ b/metagpt/tools/web_browser_engine_playwright.py @@ -12,7 +12,6 @@ from typing import Literal from playwright.async_api import async_playwright -from metagpt.config import CONFIG from metagpt.logs import logger from metagpt.utils.parse_html import WebPage @@ -32,6 +31,8 @@ class PlaywrightWrapper: launch_kwargs: dict | None = None, **kwargs, ) -> None: + from metagpt.config import CONFIG + if browser_type is None: browser_type = CONFIG.playwright_browser_type self.browser_type = browser_type diff --git a/tests/metagpt/tools/functions/__init__.py b/tests/metagpt/tools/functions/__init__.py deleted file mode 100644 index 7d36f3404..000000000 --- a/tests/metagpt/tools/functions/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# @Time : 2023/11/17 10:24 -# @Author : lidanyang -# @File : __init__.py -# @Desc : diff --git a/tests/metagpt/tools/functions/libs/test_vision.py b/tests/metagpt/tools/functions/libs/test_vision.py deleted file mode 100644 index f4f97c46a..000000000 --- a/tests/metagpt/tools/functions/libs/test_vision.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -@Time : 2024/01/15 -@Author : mannaandpoem -@File : test_vision.py -""" -import pytest - -from metagpt import logs -from metagpt.tools.functions.libs.vision import Vision - - -@pytest.fixture -def mock_webpages(): - return """```html\n\n -\n\n```\n -```css\n.class { ... }\n```\n -```javascript\nfunction() { ... }\n```\n""" - - -def test_vision_generate_webpages(mocker, mock_webpages): - mocker.patch( - "metagpt.tools.functions.libs.vision.Vision.generate_web_pages", - return_value=mock_webpages - ) - image_path = "image.png" - vision = Vision() - rsp = vision.generate_web_pages(image_path=image_path) - logs.logger.info(rsp) - assert "html" in rsp - assert "css" in rsp - assert "javascript" in rsp - - -def test_save_webpages(mocker, mock_webpages): - mocker.patch( - "metagpt.tools.functions.libs.vision.Vision.generate_web_pages", - return_value=mock_webpages - ) - image_path = "image.png" - vision = Vision() - webpages = vision.generate_web_pages(image_path) - webpages_dir = vision.save_webpages(image_path=image_path, webpages=webpages) - logs.logger.info(webpages_dir) - assert webpages_dir.exists() - - diff --git a/tests/metagpt/tools/functions/libs/__init__.py b/tests/metagpt/tools/libs/__init__.py similarity index 100% rename from tests/metagpt/tools/functions/libs/__init__.py rename to tests/metagpt/tools/libs/__init__.py diff --git a/tests/metagpt/tools/functions/libs/test_data_preprocess.py b/tests/metagpt/tools/libs/test_data_preprocess.py similarity index 97% rename from tests/metagpt/tools/functions/libs/test_data_preprocess.py rename to tests/metagpt/tools/libs/test_data_preprocess.py index 3c2d661ab..418f8adee 100644 --- a/tests/metagpt/tools/functions/libs/test_data_preprocess.py +++ b/tests/metagpt/tools/libs/test_data_preprocess.py @@ -5,7 +5,7 @@ import numpy.testing as npt import pandas as pd import pytest -from metagpt.tools.functions.libs.data_preprocess import ( +from metagpt.tools.libs.data_preprocess import ( FillMissingValue, LabelEncode, MaxAbsScale, diff --git a/tests/metagpt/tools/functions/libs/test_feature_engineering.py b/tests/metagpt/tools/libs/test_feature_engineering.py similarity index 97% rename from tests/metagpt/tools/functions/libs/test_feature_engineering.py rename to tests/metagpt/tools/libs/test_feature_engineering.py index 5b45aeb0c..3cfd5dacd 100644 --- a/tests/metagpt/tools/functions/libs/test_feature_engineering.py +++ b/tests/metagpt/tools/libs/test_feature_engineering.py @@ -3,7 +3,7 @@ import pandas as pd import pytest from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris -from metagpt.tools.functions.libs.feature_engineering import ( +from metagpt.tools.libs.feature_engineering import ( CatCount, CatCross, ExtractTimeComps, @@ -147,6 +147,7 @@ def test_general_selection(mock_dataset): assert "cat2" not in transformed.columns +@pytest.mark.skip # skip because TreeBasedSelection needs lgb as dependency def test_tree_based_selection(mock_dataset): # regression data = load_sklearn_data("housing") diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py new file mode 100644 index 000000000..360ca4a75 --- /dev/null +++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2024/01/15 +@Author : mannaandpoem +@File : test_vision.py +""" +import pytest + +from metagpt import logs +from metagpt.tools.libs.gpt_v_generator import GPTvGenerator + + +@pytest.fixture +def mock_webpages(mocker): + mock_data = """```html\n\n +\n\n```\n +```css\n.class { ... }\n```\n +```javascript\nfunction() { ... }\n```\n""" + mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_web_pages", return_value=mock_data) + return mocker + + +def test_vision_generate_webpages(mock_webpages): + image_path = "image.png" + generator = GPTvGenerator() + rsp = generator.generate_web_pages(image_path=image_path) + logs.logger.info(rsp) + assert "html" in rsp + assert "css" in rsp + assert "javascript" in rsp + + +def test_save_webpages(mock_webpages): + image_path = "image.png" + generator = GPTvGenerator() + webpages = generator.generate_web_pages(image_path) + webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages) + logs.logger.info(webpages_dir) + assert webpages_dir.exists() diff --git a/tests/metagpt/tools/functions/test_sd.py b/tests/metagpt/tools/libs/test_sd_engine.py similarity index 93% rename from tests/metagpt/tools/functions/test_sd.py rename to tests/metagpt/tools/libs/test_sd_engine.py index 142101cad..363cf96b9 100644 --- a/tests/metagpt/tools/functions/test_sd.py +++ b/tests/metagpt/tools/libs/test_sd_engine.py @@ -4,7 +4,7 @@ # @Desc : import pytest -from metagpt.tools.sd_engine import SDEngine +from metagpt.tools.libs.sd_engine import SDEngine def test_sd_tools(): diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/libs/test_udf.py similarity index 95% rename from tests/metagpt/tools/functions/test_udf.py rename to tests/metagpt/tools/libs/test_udf.py index 741bd9a9f..19e523448 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/libs/test_udf.py @@ -3,7 +3,7 @@ import json import yaml from metagpt.logs import logger -from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml +from metagpt.tools.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml def test_udfs(): diff --git a/tests/metagpt/tools/test_tool_registry.py b/tests/metagpt/tools/test_tool_registry.py new file mode 100644 index 000000000..582c368a8 --- /dev/null +++ b/tests/metagpt/tools/test_tool_registry.py @@ -0,0 +1,101 @@ +import pytest + +from metagpt.tools.tool_registry import ToolRegistry +from metagpt.tools.tool_types import ToolType + + +@pytest.fixture +def tool_registry(): + return ToolRegistry() + + +@pytest.fixture +def schema_yaml(mocker): + mock_yaml_content = """ + tool_name: + key1: value1 + key2: value2 + """ + mocker.patch("os.path.exists", return_value=True) + mocker.patch("builtins.open", mocker.mock_open(read_data=mock_yaml_content)) + return mocker + + +# Test Initialization +def test_initialization(tool_registry): + assert isinstance(tool_registry, ToolRegistry) + assert tool_registry.tools == {} + assert tool_registry.tool_types == {} + assert tool_registry.tools_by_types == {} + + +# Test Tool Type Registration +def test_register_tool_type(tool_registry): + tool_type = ToolType(name="TestType", desc="test") + tool_registry.register_tool_type(tool_type) + assert "TestType" in tool_registry.tool_types + + +# Test Tool Registration +def test_register_tool(tool_registry, schema_yaml): + tool_registry.register_tool("TestTool", "/path/to/tool") + assert "TestTool" in tool_registry.tools + + +# Test Tool Registration with Non-existing Schema +def test_register_tool_no_schema(tool_registry, mocker): + mocker.patch("os.path.exists", return_value=False) + tool_registry.register_tool("TestTool", "/path/to/tool") + assert "TestTool" not in tool_registry.tools + + +# Test Tool Existence Checks +def test_has_tool(tool_registry, schema_yaml): + tool_registry.register_tool("TestTool", "/path/to/tool") + assert tool_registry.has_tool("TestTool") + assert not tool_registry.has_tool("NonexistentTool") + + +# Test Tool Retrieval +def test_get_tool(tool_registry, schema_yaml): + tool_registry.register_tool("TestTool", "/path/to/tool") + tool = tool_registry.get_tool("TestTool") + assert tool is not None + assert tool.name == "TestTool" + assert tool.path == "/path/to/tool" + + +# Similar tests for has_tool_type, get_tool_type, get_tools_by_type +def test_has_tool_type(tool_registry): + tool_type = ToolType(name="TestType", desc="test") + tool_registry.register_tool_type(tool_type) + assert tool_registry.has_tool_type("TestType") + assert not tool_registry.has_tool_type("NonexistentType") + + +def test_get_tool_type(tool_registry): + tool_type = ToolType(name="TestType", desc="test") + tool_registry.register_tool_type(tool_type) + retrieved_type = tool_registry.get_tool_type("TestType") + assert retrieved_type is not None + assert retrieved_type.name == "TestType" + + +def test_get_tools_by_type(tool_registry, schema_yaml): + tool_type_name = "TestType" + tool_name = "TestTool" + tool_path = "/path/to/tool" + tool_type = ToolType(name=tool_type_name, desc="test") + tool_registry.register_tool_type(tool_type) + + tool_registry.register_tool(tool_name, tool_path, tool_type=tool_type_name) + + tools_by_type = tool_registry.get_tools_by_type(tool_type_name) + assert tools_by_type is not None + assert tool_name in tools_by_type + + +# Test case for when the tool type does not exist +def test_get_tools_by_nonexistent_type(tool_registry): + tools_by_type = tool_registry.get_tools_by_type("NonexistentType") + assert tools_by_type is None diff --git a/tests/mock/mock_llm.py b/tests/mock/mock_llm.py index 45b28c63b..a52aeed09 100644 --- a/tests/mock/mock_llm.py +++ b/tests/mock/mock_llm.py @@ -69,7 +69,6 @@ class MockLLM(OriginalLLM): A copy of metagpt.provider.openai_api.OpenAILLM.aask_code, we can't use super().aask because it will be mocked. Since openai_api.OpenAILLM.aask_code is different from base_llm.BaseLLM.aask_code, we use the former. """ - messages = self._process_message(messages) rsp = await self._achat_completion_function(messages, **kwargs) return self.get_choice_function_arguments(rsp)