tool management at one place, add aask_code mock, azure mock

This commit is contained in:
yzlin 2024-01-11 22:55:31 +08:00
parent 9e0b9745be
commit e99c5f29f4
9 changed files with 167 additions and 74 deletions

View file

@ -20,14 +20,16 @@ from metagpt.prompts.ml_engineer import (
GENERATE_CODE_PROMPT,
ML_TOOL_USAGE_PROMPT,
SELECT_FUNCTION_TOOLS,
TASK_MODULE_MAP,
TASK_SPECIFIC_PROMPT,
TOOL_RECOMMENDATION_PROMPT,
TOOL_USAGE_PROMPT,
)
from metagpt.schema import Message, Plan
from metagpt.tools import TOOL_TYPE_MAPPINGS
from metagpt.utils.common import create_func_config, remove_comments
TOOL_TYPE_MODULE = {k: v.module for k, v in TOOL_TYPE_MAPPINGS.items()}
TOOL_TYPE_USAGE_PROMPT = {k: v.usage_prompt for k, v in TOOL_TYPE_MAPPINGS.items()}
class BaseWriteAnalysisCode(Action):
DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
@ -171,9 +173,11 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
plan: Plan = None,
**kwargs,
) -> str:
task_type = plan.current_task.task_type
available_tools = self.available_tools.get(task_type, {})
special_prompt = TASK_SPECIFIC_PROMPT.get(task_type, "")
tool_type = (
plan.current_task.task_type
) # find tool type from task type through exact match, can extend to retrieval in the future
available_tools = self.available_tools.get(tool_type, {})
special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
code_steps = plan.current_task.code_steps
finished_tasks = plan.get_finished_tasks()
@ -189,10 +193,10 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
recommend_tools = await self._tool_recommendation(
plan.current_task.instruction, code_steps, available_tools
)
tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
logger.info(f"Recommended tools: \n{recommend_tools}")
module_name = TASK_MODULE_MAP[task_type]
module_name = TOOL_TYPE_MODULE[tool_type]
tools_instruction = TOOL_USAGE_PROMPT.format(
special_prompt=special_prompt, module_name=module_name, tool_catalog=tool_catalog
@ -215,9 +219,9 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
column_info: str = "",
**kwargs,
) -> Tuple[List[Message], str]:
task_type = plan.current_task.task_type
available_tools = self.available_tools.get(task_type, {})
special_prompt = TASK_SPECIFIC_PROMPT.get(task_type, "")
tool_type = plan.current_task.task_type
available_tools = self.available_tools.get(tool_type, {})
special_prompt = TOOL_TYPE_USAGE_PROMPT.get(tool_type, "")
code_steps = plan.current_task.code_steps
finished_tasks = plan.get_finished_tasks()
@ -230,10 +234,10 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
recommend_tools = await self._tool_recommendation(
plan.current_task.instruction, code_steps, available_tools
)
tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
tool_catalog = self._parse_recommend_tools(tool_type, recommend_tools)
logger.info(f"Recommended tools: \n{recommend_tools}")
module_name = TASK_MODULE_MAP[task_type]
module_name = TOOL_TYPE_MODULE[tool_type]
prompt = ML_TOOL_USAGE_PROMPT.format(
user_requirement=plan.goal,

View file

@ -12,6 +12,7 @@ from metagpt.actions import Action
from metagpt.logs import logger
from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_CONFIG, ASSIGN_TASK_TYPE_PROMPT
from metagpt.schema import Message, Plan, Task
from metagpt.tools import TOOL_TYPE_MAPPINGS
from metagpt.utils.common import CodeParser, create_func_config
@ -46,7 +47,10 @@ class WritePlan(Action):
List[Dict]: tasks with task type assigned
"""
task_list = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
prompt = ASSIGN_TASK_TYPE_PROMPT.format(task_list=task_list)
task_type_desc = "\n".join([f"- **{item.name}**: {item.desc}" for item in TOOL_TYPE_MAPPINGS.values()])
prompt = ASSIGN_TASK_TYPE_PROMPT.format(
task_list=task_list, task_type_desc=task_type_desc
) # task types are set to be the same as tool types, for now
tool_config = create_func_config(ASSIGN_TASK_TYPE_CONFIG)
rsp = await self.llm.aask_code(prompt, **tool_config)
task_type_list = rsp["task_type"]

View file

@ -54,11 +54,7 @@ Please assign a task type to each task in the list below from the given categori
{task_list}
## All Task Type:
- **feature_engineering**: Only for creating new columns for input data.
- **data_preprocess**: Only for changing value inplace.
- **model_train**: Only for training model.
- **model_evaluate**: Only for evaluating model.
- **other**: Any tasks that do not fit into the previous categories, such as visualization, summarizing findings, etc.
{task_type_desc}
"""
ASSIGN_TASK_TYPE_CONFIG = {
@ -278,52 +274,3 @@ for col in num_cols:
- The output code should contain all steps implemented correctly in 'Code Steps'.
"""
# - If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
DATA_PREPROCESS_PROMPT = """
The current task is about data preprocessing, please note the following:
- Monitor data types per column, applying appropriate methods.
- Ensure operations are on existing dataset columns.
- Avoid writing processed data to files.
- Avoid any change to label column, such as standardization, etc.
- Prefer alternatives to one-hot encoding for categorical data.
- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
- Each step do data preprocessing to train, must do same for test separately at the same time.
"""
FEATURE_ENGINEERING_PROMPT = """
The current task is about feature engineering. when performing it, please adhere to the following principles:
- Generate as diverse features as possible to improve the model's performance step-by-step.
- If potential impactful features are not included in 'Code Steps', add new steps to generate them.
- Avoid creating redundant or excessively numerous features in one step.
- Exclude ID columns from feature generation and remove them.
- Each step do feature engineering to train, must do same for test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
"""
MODEL_TRAIN_PROMPT = """
The current task is about training a model, please ensure high performance:
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as lightGBM, XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.
- Set suitable hyperparameters for the model, make metrics as high as possible.
"""
MODEL_EVALUATE_PROMPT = """
The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""
TASK_SPECIFIC_PROMPT = {
"data_preprocess": DATA_PREPROCESS_PROMPT,
"feature_engineering": FEATURE_ENGINEERING_PROMPT,
"model_train": MODEL_TRAIN_PROMPT,
"model_evaluate": MODEL_EVALUATE_PROMPT,
}
TASK_MODULE_MAP = {
"data_preprocess": "metagpt.tools.functions.libs.data_preprocess",
"feature_engineering": "metagpt.tools.functions.libs.feature_engineering",
"udf": "metagpt.tools.functions.libs.udf",
}

View file

@ -0,0 +1,35 @@
DATA_PREPROCESS_PROMPT = """
The current task is about data preprocessing, please note the following:
- Monitor data types per column, applying appropriate methods.
- Ensure operations are on existing dataset columns.
- Avoid writing processed data to files.
- Avoid any change to label column, such as standardization, etc.
- Prefer alternatives to one-hot encoding for categorical data.
- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
- Each step do data preprocessing to train, must do same for test separately at the same time.
"""
FEATURE_ENGINEERING_PROMPT = """
The current task is about feature engineering. when performing it, please adhere to the following principles:
- Generate as diverse features as possible to improve the model's performance step-by-step.
- If potential impactful features are not included in 'Code Steps', add new steps to generate them.
- Avoid creating redundant or excessively numerous features in one step.
- Exclude ID columns from feature generation and remove them.
- Each step do feature engineering to train, must do same for test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
"""
MODEL_TRAIN_PROMPT = """
The current task is about training a model, please ensure high performance:
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as lightGBM, XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.
- Set suitable hyperparameters for the model, make metrics as high as possible.
"""
MODEL_EVALUATE_PROMPT = """
The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""

View file

@ -9,6 +9,16 @@
from enum import Enum
from pydantic import BaseModel
from metagpt.const import TOOL_SCHEMA_PATH
from metagpt.prompts.tool_type import (
DATA_PREPROCESS_PROMPT,
FEATURE_ENGINEERING_PROMPT,
MODEL_TRAIN_PROMPT,
MODEL_EVALUATE_PROMPT,
)
class SearchEngineType(Enum):
SERPAPI_GOOGLE = "serpapi"
@ -27,3 +37,44 @@ class WebBrowserEngineType(Enum):
def __missing__(cls, key):
"""Default type conversion"""
return cls.CUSTOM
class ToolType(BaseModel):
name: str
module: str = ""
desc: str
usage_prompt: str = ""
TOOL_TYPE_MAPPINGS = {
"data_preprocess": ToolType(
name="data_preprocess",
module=str(TOOL_SCHEMA_PATH / "data_preprocess"),
desc="Only for changing value inplace.",
usage_prompt=DATA_PREPROCESS_PROMPT,
),
"feature_engineering": ToolType(
name="feature_engineering",
module=str(TOOL_SCHEMA_PATH / "feature_engineering"),
desc="Only for creating new columns for input data.",
usage_prompt=FEATURE_ENGINEERING_PROMPT,
),
"model_train": ToolType(
name="model_train",
module="",
desc="Only for training model.",
usage_prompt=MODEL_TRAIN_PROMPT,
),
"model_evaluate": ToolType(
name="model_evaluate",
module="",
desc="Only for evaluating model.",
usage_prompt=MODEL_EVALUATE_PROMPT,
),
"other": ToolType(
name="other",
module="",
desc="Any tasks that do not fit into the previous categories",
usage_prompt="",
),
}

View file

@ -34,14 +34,14 @@ def rsp_cache():
rsp_cache_file_path = TEST_DATA_PATH / "rsp_cache.json" # read repo-provided
new_rsp_cache_file_path = TEST_DATA_PATH / "rsp_cache_new.json" # exporting a new copy
if os.path.exists(rsp_cache_file_path):
with open(rsp_cache_file_path, "r") as f1:
with open(rsp_cache_file_path, "r", encoding="utf-8") as f1:
rsp_cache_json = json.load(f1)
else:
rsp_cache_json = {}
yield rsp_cache_json
with open(rsp_cache_file_path, "w") as f2:
with open(rsp_cache_file_path, "w", encoding="utf-8") as f2:
json.dump(rsp_cache_json, f2, indent=4, ensure_ascii=False)
with open(new_rsp_cache_file_path, "w") as f2:
with open(new_rsp_cache_file_path, "w", encoding="utf-8") as f2:
json.dump(RSP_CACHE_NEW, f2, indent=4, ensure_ascii=False)
@ -60,6 +60,7 @@ def llm_mock(rsp_cache, mocker, request):
llm.rsp_cache = rsp_cache
mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", llm.aask)
mocker.patch("metagpt.provider.base_llm.BaseLLM.aask_batch", llm.aask_batch)
mocker.patch("metagpt.provider.openai_api.OpenAILLM.aask_code", llm.aask_code)
yield mocker
if hasattr(request.node, "test_outcome") and request.node.test_outcome.passed:
if llm.rsp_candidates:
@ -67,7 +68,7 @@ def llm_mock(rsp_cache, mocker, request):
cand_key = list(rsp_candidate.keys())[0]
cand_value = list(rsp_candidate.values())[0]
if cand_key not in llm.rsp_cache:
logger.info(f"Added '{cand_key[:100]} ... -> {cand_value[:20]} ...' to response cache")
logger.info(f"Added '{cand_key[:100]} ... -> {str(cand_value)[:20]} ...' to response cache")
llm.rsp_cache.update(rsp_candidate)
RSP_CACHE_NEW.update(rsp_candidate)

View file

@ -1,4 +1,12 @@
from metagpt.actions.write_plan import Plan, Task, precheck_update_plan_from_rsp
import pytest
from metagpt.actions.write_plan import (
Plan,
Task,
WritePlan,
precheck_update_plan_from_rsp,
)
from metagpt.schema import Message
def test_precheck_update_plan_from_rsp():
@ -12,3 +20,12 @@ def test_precheck_update_plan_from_rsp():
invalid_rsp = "wrong"
success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
assert not success
@pytest.mark.asyncio
async def test_write_plan():
rsp = await WritePlan().run(context=[Message("run analysis on sklearn iris dataset", role="user")])
assert "task_id" in rsp
assert "instruction" in rsp
assert "json" not in rsp # the output should be the content inside ```json ```

View file

@ -0,0 +1,13 @@
import pytest
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
@pytest.mark.asyncio
async def test_code_interpreter():
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
ci = CodeInterpreter(goal=requirement, auto_run=True, use_tools=False)
rsp = await ci.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -1,10 +1,16 @@
from typing import Optional
import json
from typing import Optional, Union
from metagpt.config import CONFIG
from metagpt.logs import log_llm_stream, logger
from metagpt.provider.azure_openai_api import AzureOpenAILLM
from metagpt.provider.openai_api import OpenAILLM
from metagpt.schema import Message
OriginalLLM = OpenAILLM if not CONFIG.openai_api_type else AzureOpenAILLM
class MockLLM(OpenAILLM):
class MockLLM(OriginalLLM):
def __init__(self, allow_open_api_call):
super().__init__()
self.allow_open_api_call = allow_open_api_call
@ -58,6 +64,15 @@ class MockLLM(OpenAILLM):
context.append(self._assistant_msg(rsp_text))
return self._extract_assistant_rsp(context)
async def original_aask_code(self, messages: Union[str, Message, list[dict]], **kwargs) -> dict:
"""
A copy of metagpt.provider.openai_api.OpenAILLM.aask_code, we can't use super().aask because it will be mocked.
Since openai_api.OpenAILLM.aask_code is different from base_llm.BaseLLM.aask_code, we use the former.
"""
messages = self._process_message(messages)
rsp = await self._achat_completion_function(messages, **kwargs)
return self.get_choice_function_arguments(rsp)
async def aask(
self,
msg: str,
@ -78,6 +93,12 @@ class MockLLM(OpenAILLM):
rsp = await self._mock_rsp(msg_key, self.original_aask_batch, msgs, timeout)
return rsp
async def aask_code(self, messages: Union[str, Message, list[dict]], **kwargs) -> dict:
messages = self._process_message(messages)
msg_key = json.dumps(messages, ensure_ascii=False)
rsp = await self._mock_rsp(msg_key, self.original_aask_code, messages, **kwargs)
return rsp
async def _mock_rsp(self, msg_key, ask_func, *args, **kwargs):
if msg_key not in self.rsp_cache:
if not self.allow_open_api_call: