Merge pull request #995 from geekan/code_interpreter

Restructure DataInterpreter
This commit is contained in:
Alexander Wu 2024-03-14 01:26:51 +08:00 committed by GitHub
commit 4cf6102921
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 1200 additions and 1733 deletions

View file

@ -1,7 +1,7 @@
# Data Interpreter (DI)
## What is Data Interpreter
Data Interpreter is an agent who solves problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below.
Data Interpreter is an agent who solves data-related problems through codes. It understands user requirements, makes plans, writes codes for execution, and uses tools if necessary. These capabilities enable it to tackle a wide range of scenarios, please check out the examples below. For overall design and technical details, please see our [paper](https://arxiv.org/abs/2402.18679).
## Example List
- Data visualization
@ -12,7 +12,9 @@ ## Example List
- Tool usage: web page imitation
- Tool usage: web crawling
- Tool usage: text2image
- Tool usage: email summarization and response
- Tool usage: email summarization and response\
- More on the way!
Please see [here](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for detailed explanation.
Please see the [docs](https://docs.deepwisdom.ai/main/en/guide/use_cases/agent/interpreter/intro.html) for more explanation.
We are continuously releasing codes, stay tuned!

View file

@ -7,13 +7,31 @@
from metagpt.roles.di.data_interpreter import DataInterpreter
PAPER_LIST_REQ = """"
Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
"""
ECOMMERCE_REQ = """
Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
**Notice: Firstly parse the web page encoding and the text HTML structure;
The first page product name, price, product URL, and image URL must be saved in the csv;**
"""
NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整:
1. 爬取并本地保存html结构;
2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*;
3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题链接时间;
4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个
5. 将全部结果存在本地csv中
"""
async def main():
prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
di = DataInterpreter(use_tools=True)
di = DataInterpreter(tools=["scrape_web_playwright"])
await di.run(prompt)
await di.run(ECOMMERCE_REQ)
if __name__ == "__main__":

View file

@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)

View file

@ -22,7 +22,7 @@ async def main():
Firstly, Please help me fetch the latest 5 senders and full letter contents.
Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""
di = DataInterpreter(use_tools=True)
di = DataInterpreter()
await di.run(prompt)

View file

@ -12,10 +12,9 @@ async def main():
web_url = "https://pytorch.org/"
prompt = f"""This is a URL of webpage: '{web_url}' .
Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Finally, save webpage in a text file.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Note: All required dependencies and environments have been fully installed and configured."""
di = DataInterpreter(use_tools=True)
di = DataInterpreter(tools=["GPTvGenerator"])
await di.run(prompt)

View file

@ -0,0 +1,16 @@
import asyncio
from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str):
role = DataInterpreter(use_reflection=True, tools=["<all>"])
await role.run(requirement)
if __name__ == "__main__":
data_path = "your/path/to/titanic"
train_path = f"{data_path}/split_train.csv"
eval_path = f"{data_path}/split_eval.csv"
requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{train_path}', eval data path: '{eval_path}'."
asyncio.run(main(requirement))

View file

@ -1,16 +0,0 @@
import asyncio
from metagpt.roles.di.ml_engineer import MLEngineer
async def main(requirement: str):
role = MLEngineer(auto_run=True, use_tools=True)
await role.run(requirement)
if __name__ == "__main__":
data_path = "your_path_to_icr/icr-identify-age-related-conditions"
train_path = f"{data_path}/your_train_data.csv"
eval_path = f"{data_path}/your_eval_data.csv"
requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {train_path}, eval data path:{eval_path}."
asyncio.run(main(requirement))

View file

@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)

View file

@ -8,7 +8,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=True, goal=requirement)
di = DataInterpreter(tools=["SDEngine"])
await di.run(requirement)

View file

@ -4,10 +4,11 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)
if __name__ == "__main__":
requirement = "Solve this math problem: The greatest common divisor of positive integers m and n is 6. The least common multiple of m and n is 126. What is the least possible value of m + n?"
# answer: 60 (m = 18, n = 42)
asyncio.run(main(requirement))

View file

@ -23,7 +23,7 @@ from metagpt.actions.write_prd import WritePRD
from metagpt.actions.write_prd_review import WritePRDReview
from metagpt.actions.write_test import WriteTest
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import WriteCodeWithoutTools, WriteCodeWithTools
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
from metagpt.actions.di.write_plan import WritePlan
@ -46,8 +46,7 @@ class ActionType(Enum):
WEB_BROWSE_AND_SUMMARIZE = WebBrowseAndSummarize
CONDUCT_RESEARCH = ConductResearch
EXECUTE_NB_CODE = ExecuteNbCode
WRITE_CODE_WITHOUT_TOOLS = WriteCodeWithoutTools
WRITE_CODE_WITH_TOOLS = WriteCodeWithTools
WRITE_ANALYSIS_CODE = WriteAnalysisCode
WRITE_PLAN = WritePlan

View file

@ -1,109 +0,0 @@
from __future__ import annotations
from metagpt.actions.di.write_analysis_code import BaseWriteAnalysisCode
from metagpt.logs import logger
from metagpt.schema import Message
from metagpt.utils.common import create_func_call_config
DEBUG_REFLECTION_EXAMPLE = '''
Example 1:
[previous impl]:
```python
def add(a: int, b: int) -> int:
"""
Given integers a and b, return the total value of a and b.
"""
return a - b
```
[runtime Error]:
Tested passed:
Tests failed:
assert add(1, 2) == 3 # output: -1
assert add(1, 2) == 4 # output: -1
[reflection on previous impl]:
The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
[improved impl]:
```python
def add(a: int, b: int) -> int:
"""
Given integers a and b, return the total value of a and b.
"""
return a + b
```
'''
REFLECTION_PROMPT = """
Here is an example for you.
{debug_example}
[context]
{context}
[previous impl]
{code}
[runtime Error]
{runtime_result}
Analysis the error step by step, provide me improve method and code. Remember to follow [context] requirement. Don't forget write code for steps behind the error step.
[reflection on previous impl]:
xxx
"""
CODE_REFLECTION = {
"name": "execute_reflection_code",
"description": "Execute reflection code.",
"parameters": {
"type": "object",
"properties": {
"reflection": {
"type": "string",
"description": "Reflection on previous impl.",
},
"improved_impl": {
"type": "string",
"description": "Refined code after reflection.",
},
},
"required": ["reflection", "improved_impl"],
},
}
class DebugCode(BaseWriteAnalysisCode):
async def run(
self,
context: list[Message] = None,
code: str = "",
runtime_result: str = "",
) -> str:
"""
Execute the debugging process based on the provided context, code, and runtime_result.
Args:
context (list[Message]): A list of Message objects representing the context.
code (str): The code to be debugged.
runtime_result (str): The result of the code execution.
Returns:
str: The improved implementation based on the debugging process.
"""
info = []
reflection_prompt = REFLECTION_PROMPT.format(
debug_example=DEBUG_REFLECTION_EXAMPLE,
context=context,
code=code,
runtime_result=runtime_result,
)
system_prompt = "You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation "
info.append(Message(role="system", content=system_prompt))
info.append(Message(role="user", content=reflection_prompt))
tool_config = create_func_call_config(CODE_REFLECTION)
reflection = await self.llm.aask_code(messages=info, **tool_config)
logger.info(f"reflection is {reflection}")
return {"code": reflection["improved_impl"]}

View file

@ -9,7 +9,6 @@ from __future__ import annotations
import asyncio
import base64
import re
import traceback
from typing import Literal, Tuple
import nbformat
@ -58,7 +57,23 @@ class ExecuteNbCode(Action):
async def terminate(self):
"""kill NotebookClient"""
await self.nb_client._async_cleanup_kernel()
if self.nb_client.km is not None and await self.nb_client.km.is_alive():
await self.nb_client.km.shutdown_kernel(now=True)
await self.nb_client.km.cleanup_resources()
channels = [
self.nb_client.kc.stdin_channel, # The channel for handling standard input to the kernel.
self.nb_client.kc.hb_channel, # The channel for heartbeat communication between the kernel and client.
self.nb_client.kc.control_channel, # The channel for controlling the kernel.
]
# Stops all the running channels for this kernel
for channel in channels:
if channel.is_alive():
channel.stop()
self.nb_client.kc = None
self.nb_client.km = None
async def reset(self):
"""reset NotebookClient"""
@ -91,17 +106,17 @@ class ExecuteNbCode(Action):
else:
cell["outputs"].append(new_output(output_type="stream", name="stdout", text=str(output)))
def parse_outputs(self, outputs: list[str]) -> str:
def parse_outputs(self, outputs: list[str], keep_len: int = 2000) -> Tuple[bool, str]:
"""Parses the outputs received from notebook execution."""
assert isinstance(outputs, list)
parsed_output = ""
parsed_output, is_success = [], True
for i, output in enumerate(outputs):
output_text = ""
if output["output_type"] == "stream" and not any(
tag in output["text"]
for tag in ["| INFO | metagpt", "| ERROR | metagpt", "| WARNING | metagpt", "DEBUG"]
):
parsed_output += output["text"]
output_text = output["text"]
elif output["output_type"] == "display_data":
if "image/png" in output["data"]:
self.show_bytes_figure(output["data"]["image/png"], self.interaction)
@ -110,8 +125,22 @@ class ExecuteNbCode(Action):
f"{i}th output['data'] from nbclient outputs dont have image/png, continue next output ..."
)
elif output["output_type"] == "execute_result":
parsed_output += output["data"]["text/plain"]
return parsed_output
output_text = output["data"]["text/plain"]
elif output["output_type"] == "error":
output_text, is_success = "\n".join(output["traceback"]), False
# handle coroutines that are not executed asynchronously
if output_text.strip().startswith("<coroutine object"):
output_text = "Executed code failed, you need use key word 'await' to run a async code."
is_success = False
output_text = remove_escape_and_color_codes(output_text)
# The useful information of the exception is at the end,
# the useful information of normal output is at the begining.
output_text = output_text[:keep_len] if is_success else output_text[-keep_len:]
parsed_output.append(output_text)
return is_success, ",".join(parsed_output)
def show_bytes_figure(self, image_base64: str, interaction_type: Literal["ipython", None]):
image_bytes = base64.b64decode(image_base64)
@ -145,7 +174,7 @@ class ExecuteNbCode(Action):
"""
try:
await self.nb_client.async_execute_cell(cell, cell_index)
return True, ""
return self.parse_outputs(self.nb.cells[-1].outputs)
except CellTimeoutError:
assert self.nb_client.km is not None
await self.nb_client.km.interrupt_kernel()
@ -156,7 +185,7 @@ class ExecuteNbCode(Action):
await self.reset()
return False, "DeadKernelError"
except Exception:
return False, f"{traceback.format_exc()}"
return self.parse_outputs(self.nb.cells[-1].outputs)
async def run(self, code: str, language: Literal["python", "markdown"] = "python") -> Tuple[str, bool]:
"""
@ -173,14 +202,7 @@ class ExecuteNbCode(Action):
# run code
cell_index = len(self.nb.cells) - 1
success, error_message = await self.run_cell(self.nb.cells[-1], cell_index)
if not success:
return truncate(remove_escape_and_color_codes(error_message), is_success=success)
# code success
outputs = self.parse_outputs(self.nb.cells[-1].outputs)
outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success)
success, outputs = await self.run_cell(self.nb.cells[-1], cell_index)
if "!pip" in code:
success = False
@ -196,54 +218,39 @@ class ExecuteNbCode(Action):
raise ValueError(f"Only support for language: python, markdown, but got {language}, ")
def truncate(result: str, keep_len: int = 2000, is_success: bool = True):
"""对于超出keep_len个字符的result: 执行失败的代码, 展示result后keep_len个字符; 执行成功的代码, 展示result前keep_len个字符。"""
if is_success:
desc = f"Executed code successfully. Truncated to show only first {keep_len} characters\n"
else:
desc = f"Executed code failed, please reflect the cause of bug and then debug. Truncated to show only last {keep_len} characters\n"
if result.strip().startswith("<coroutine object"):
result = "Executed code failed, you need use key word 'await' to run a async code."
return result, False
if len(result) > keep_len:
result = result[-keep_len:] if not is_success else result[:keep_len]
return desc + result, is_success
return result, is_success
def remove_escape_and_color_codes(input_str: str):
# 使用正则表达式去除转义字符和颜色代码
# 使用正则表达式去除jupyter notebook输出结果中的转义字符和颜色代码
# Use regular expressions to get rid of escape characters and color codes in jupyter notebook output.
pattern = re.compile(r"\x1b\[[0-9;]*[mK]")
result = pattern.sub("", input_str)
return result
def display_markdown(content: str):
# 使用正则表达式逐个匹配代码块
# Use regular expressions to match blocks of code one by one.
matches = re.finditer(r"```(.+?)```", content, re.DOTALL)
start_index = 0
content_panels = []
# 逐个打印匹配到的文本和代码
# Set the text background color and text color.
style = "black on white"
# Print the matching text and code one by one.
for match in matches:
text_content = content[start_index : match.start()].strip()
code_content = match.group(0).strip()[3:-3] # Remove triple backticks
if text_content:
content_panels.append(Panel(Markdown(text_content), box=MINIMAL))
content_panels.append(Panel(Markdown(text_content), style=style, box=MINIMAL))
if code_content:
content_panels.append(Panel(Markdown(f"```{code_content}"), box=MINIMAL))
content_panels.append(Panel(Markdown(f"```{code_content}"), style=style, box=MINIMAL))
start_index = match.end()
# 打印剩余文本(如果有)
# Print remaining text (if any).
remaining_text = content[start_index:].strip()
if remaining_text:
content_panels.append(Panel(Markdown(remaining_text), box=MINIMAL))
content_panels.append(Panel(Markdown(remaining_text), style=style, box=MINIMAL))
# 在Live模式中显示所有Panel
# Display all panels in Live mode.
with Live(auto_refresh=False, console=Console(), vertical_overflow="visible") as live:
live.update(Group(*content_panels))
live.refresh()

View file

@ -1,70 +0,0 @@
from __future__ import annotations
from typing import Tuple
from metagpt.actions import Action
from metagpt.actions.di.write_analysis_code import WriteCodeWithTools
from metagpt.prompts.di.ml_action import (
ML_GENERATE_CODE_PROMPT,
ML_TOOL_USAGE_PROMPT,
PRINT_DATA_COLUMNS,
UPDATE_DATA_COLUMNS,
)
from metagpt.prompts.di.write_analysis_code import CODE_GENERATOR_WITH_TOOLS
from metagpt.schema import Message, Plan
from metagpt.utils.common import create_func_call_config, remove_comments
class WriteCodeWithToolsML(WriteCodeWithTools):
async def run(
self,
context: list[Message],
plan: Plan = None,
column_info: str = "",
**kwargs,
) -> Tuple[list[Message], str]:
# prepare tool schemas and tool-type-specific instruction
tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
# ML-specific variables to be used in prompt
finished_tasks = plan.get_finished_tasks()
code_context = [remove_comments(task.code) for task in finished_tasks]
code_context = "\n\n".join(code_context)
# prepare prompt depending on tool availability & LLM call
if tool_schemas:
prompt = ML_TOOL_USAGE_PROMPT.format(
user_requirement=plan.goal,
history_code=code_context,
current_task=plan.current_task.instruction,
column_info=column_info,
tool_type_usage_prompt=tool_type_usage_prompt,
tool_schemas=tool_schemas,
)
else:
prompt = ML_GENERATE_CODE_PROMPT.format(
user_requirement=plan.goal,
history_code=code_context,
current_task=plan.current_task.instruction,
column_info=column_info,
tool_type_usage_prompt=tool_type_usage_prompt,
)
tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)
# Extra output to be used for potential debugging
context = [Message(content=prompt, role="user")]
return context, rsp
class UpdateDataColumns(Action):
async def run(self, plan: Plan = None) -> dict:
finished_tasks = plan.get_finished_tasks()
code_context = [remove_comments(task.code) for task in finished_tasks]
code_context = "\n\n".join(code_context)
prompt = UPDATE_DATA_COLUMNS.format(history_code=code_context)
tool_config = create_func_call_config(PRINT_DATA_COLUMNS)
rsp = await self.llm.aask_code(prompt, **tool_config)
return rsp

View file

@ -6,150 +6,68 @@
"""
from __future__ import annotations
from typing import Tuple
import json
from metagpt.actions import Action
from metagpt.logs import logger
from metagpt.prompts.di.write_analysis_code import (
CODE_GENERATOR_WITH_TOOLS,
SELECT_FUNCTION_TOOLS,
TOOL_RECOMMENDATION_PROMPT,
TOOL_USAGE_PROMPT,
CHECK_DATA_PROMPT,
DEBUG_REFLECTION_EXAMPLE,
INTERPRETER_SYSTEM_MSG,
REFLECTION_PROMPT,
REFLECTION_SYSTEM_MSG,
STRUCTUAL_PROMPT,
)
from metagpt.schema import Message, Plan, SystemMessage
from metagpt.tools import TOOL_REGISTRY
from metagpt.tools.tool_registry import validate_tool_names
from metagpt.utils.common import create_func_call_config
from metagpt.schema import Message, Plan
from metagpt.utils.common import CodeParser, process_message, remove_comments
class BaseWriteAnalysisCode(Action):
DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
# REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
def insert_system_message(self, context: list[Message], system_msg: str = None):
system_msg = system_msg or self.DEFAULT_SYSTEM_MSG
context.insert(0, SystemMessage(content=system_msg)) if context[0].role != "system" else None
return context
async def run(self, context: list[Message], plan: Plan = None) -> dict:
"""Run of a code writing action, used in data analysis or modeling
Args:
context (list[Message]): Action output history, source action denoted by Message.cause_by
plan (Plan, optional): Overall plan. Defaults to None.
Returns:
dict: code result in the format of {"code": "print('hello world')", "language": "python"}
"""
raise NotImplementedError
class WriteCodeWithoutTools(BaseWriteAnalysisCode):
"""Ask LLM to generate codes purely by itself without local user-defined tools"""
async def run(self, context: list[Message], plan: Plan = None, system_msg: str = None, **kwargs) -> dict:
messages = self.insert_system_message(context, system_msg)
rsp = await self.llm.aask_code(messages, **kwargs)
return rsp
class WriteCodeWithTools(BaseWriteAnalysisCode):
"""Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
# selected tools to choose from, listed by their names. An empty list means selection from all tools.
selected_tools: list[str] = []
def _get_tools_by_type(self, tool_type: str) -> dict:
"""
Retreive tools by tool type from registry, but filtered by pre-selected tool list
Args:
tool_type (str): Tool type to retrieve from the registry
Returns:
dict: A dict of tool name to Tool object, representing available tools under the type
"""
candidate_tools = TOOL_REGISTRY.get_tools_by_type(tool_type)
if self.selected_tools:
candidate_tool_names = set(self.selected_tools) & candidate_tools.keys()
candidate_tools = {tool_name: candidate_tools[tool_name] for tool_name in candidate_tool_names}
return candidate_tools
async def _recommend_tool(
self,
task: str,
available_tools: dict,
) -> dict:
"""
Recommend tools for the specified task.
Args:
task (str): the task to recommend tools for
available_tools (dict): the available tools description
Returns:
dict: schemas of recommended tools for the specified task
"""
prompt = TOOL_RECOMMENDATION_PROMPT.format(
current_task=task,
available_tools=available_tools,
)
tool_config = create_func_call_config(SELECT_FUNCTION_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)
recommend_tools = rsp["recommend_tools"]
logger.info(f"Recommended tools: \n{recommend_tools}")
# Parses and validates the recommended tools, for LLM might hallucinate and recommend non-existing tools
valid_tools = validate_tool_names(recommend_tools, return_tool_object=True)
tool_schemas = {tool.name: tool.schemas for tool in valid_tools}
return tool_schemas
async def _prepare_tools(self, plan: Plan) -> Tuple[dict, str]:
"""Prepare tool schemas and usage instructions according to current task
Args:
plan (Plan): The overall plan containing task information.
Returns:
Tuple[dict, str]: A tool schemas ({tool_name: tool_schema_dict}) and a usage prompt for the type of tools selected
"""
# find tool type from task type through exact match, can extend to retrieval in the future
tool_type = plan.current_task.task_type
# prepare tool-type-specific instruction
tool_type_usage_prompt = (
TOOL_REGISTRY.get_tool_type(tool_type).usage_prompt if TOOL_REGISTRY.has_tool_type(tool_type) else ""
class WriteAnalysisCode(Action):
async def _debug_with_reflection(self, context: list[Message], working_memory: list[Message]):
reflection_prompt = REFLECTION_PROMPT.format(
debug_example=DEBUG_REFLECTION_EXAMPLE,
context=context,
previous_impl=working_memory,
)
# prepare schemas of available tools
tool_schemas = {}
available_tools = self._get_tools_by_type(tool_type)
if available_tools:
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
tool_schemas = await self._recommend_tool(plan.current_task.instruction, available_tools)
rsp = await self._aask(reflection_prompt, system_msgs=[REFLECTION_SYSTEM_MSG])
reflection = json.loads(CodeParser.parse_code(block=None, text=rsp))
return tool_schemas, tool_type_usage_prompt
return reflection["improved_impl"]
async def run(
self,
context: list[Message],
plan: Plan,
user_requirement: str,
plan_status: str = "",
tool_info: str = "",
working_memory: list[Message] = None,
use_reflection: bool = False,
**kwargs,
) -> str:
# prepare tool schemas and tool-type-specific instruction
tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
# form a complete tool usage instruction and include it as a message in context
tools_instruction = TOOL_USAGE_PROMPT.format(
tool_schemas=tool_schemas, tool_type_usage_prompt=tool_type_usage_prompt
structual_prompt = STRUCTUAL_PROMPT.format(
user_requirement=user_requirement,
plan_status=plan_status,
tool_info=tool_info,
)
context.append(Message(content=tools_instruction, role="user"))
# prepare prompt & LLM call
prompt = self.insert_system_message(context)
tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)
working_memory = working_memory or []
context = process_message([Message(content=structual_prompt, role="user")] + working_memory)
return rsp
# LLM call
if use_reflection:
code = await self._debug_with_reflection(context=context, working_memory=working_memory)
else:
rsp = await self.llm.aask(context, system_msgs=[INTERPRETER_SYSTEM_MSG], **kwargs)
code = CodeParser.parse_code(block=None, text=rsp)
return code
class CheckData(Action):
async def run(self, plan: Plan) -> dict:
finished_tasks = plan.get_finished_tasks()
code_written = [remove_comments(task.code) for task in finished_tasks]
code_written = "\n\n".join(code_written)
prompt = CHECK_DATA_PROMPT.format(code_written=code_written)
rsp = await self._aask(prompt)
code = CodeParser.parse_code(block=None, text=rsp)
return code

View file

@ -12,81 +12,49 @@ from typing import Tuple
from metagpt.actions import Action
from metagpt.logs import logger
from metagpt.prompts.di.write_analysis_code import (
ASSIGN_TASK_TYPE_CONFIG,
ASSIGN_TASK_TYPE_PROMPT,
)
from metagpt.schema import Message, Plan, Task
from metagpt.tools import TOOL_REGISTRY
from metagpt.utils.common import CodeParser, create_func_call_config
from metagpt.strategy.task_type import TaskType
from metagpt.utils.common import CodeParser
class WritePlan(Action):
PROMPT_TEMPLATE: str = """
# Context:
__context__
{context}
# Available Task Types:
{task_type_desc}
# Task:
Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to {max_tasks} tasks.
If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
If you encounter errors on the current task, revise and output the current single task only.
Output a list of jsons following the format:
```json
[
{
{{
"task_id": str = "unique identifier for a task in plan, can be an ordinal",
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
"instruction": "what you should do in this task, one short phrase or sentence",
},
"task_type": "type of this task, should be one of Available Task Types",
}},
...
]
```
"""
async def assign_task_type(self, tasks: list[dict]) -> str:
"""Assign task type to each task in tasks
Args:
tasks (list[dict]): tasks to be assigned task type
Returns:
str: tasks with task type assigned in a json string
"""
task_info = "\n".join([f"Task {task['task_id']}: {task['instruction']}" for task in tasks])
task_type_desc = "\n".join(
[f"- **{tool_type.name}**: {tool_type.desc}" for tool_type in TOOL_REGISTRY.get_tool_types().values()]
) # task type are binded with tool type now, should be improved in the future
prompt = ASSIGN_TASK_TYPE_PROMPT.format(
task_info=task_info, task_type_desc=task_type_desc
) # task types are set to be the same as tool types, for now
tool_config = create_func_call_config(ASSIGN_TASK_TYPE_CONFIG)
rsp = await self.llm.aask_code(prompt, **tool_config)
task_type_list = rsp["task_type"]
logger.info(f"assigned task types: {task_type_list}")
for task, task_type in zip(tasks, task_type_list):
task["task_type"] = task_type
return json.dumps(tasks)
async def run(self, context: list[Message], max_tasks: int = 5, use_tools: bool = False) -> str:
prompt = (
self.PROMPT_TEMPLATE.replace("__context__", "\n".join([str(ct) for ct in context]))
# .replace("__current_plan__", current_plan)
.replace("__max_tasks__", str(max_tasks))
async def run(self, context: list[Message], max_tasks: int = 5) -> str:
task_type_desc = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])
prompt = self.PROMPT_TEMPLATE.format(
context="\n".join([str(ct) for ct in context]), max_tasks=max_tasks, task_type_desc=task_type_desc
)
rsp = await self._aask(prompt)
rsp = CodeParser.parse_code(block=None, text=rsp)
if use_tools:
rsp = await self.assign_task_type(json.loads(rsp))
return rsp
def rsp_to_tasks(rsp: str) -> list[Task]:
def update_plan_from_rsp(rsp: str, current_plan: Plan):
rsp = json.loads(rsp)
tasks = [Task(**task_config) for task_config in rsp]
return tasks
def update_plan_from_rsp(rsp: str, current_plan: Plan):
tasks = rsp_to_tasks(rsp)
if len(tasks) == 1 or tasks[0].dependent_task_ids:
if tasks[0].dependent_task_ids and len(tasks) > 1:
# tasks[0].dependent_task_ids means the generated tasks are not a complete plan

View file

@ -1,128 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/11/24 15:43
# @Author : lidanyang
# @File : ml_action
# @Desc :
UPDATE_DATA_COLUMNS = """
# Background
Keep dataset column information updated before model train.
## Done Tasks
```python
{history_code}
```end
# Task
Update and print the dataset's column information only if the train or test data has changed. Use the following code:
```python
from metagpt.tools.libs.data_preprocess import get_column_info
column_info = get_column_info(df)
print("column_info")
print(column_info)
```end
# Constraints:
- Use the DataFrame variable from 'Done Tasks' in place of df.
- Import `get_column_info` only if it's not already imported.
"""
PRINT_DATA_COLUMNS = {
"name": "print_column_info",
"description": "Print the latest column information after 'Done Tasks' code if first read or data changed.",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The code to be added to a new cell in jupyter.",
},
},
"required": ["code"],
},
}
ML_COMMON_PROMPT = """
# Background
As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
## Done Tasks
```python
{history_code}
```end
## Current Task
{current_task}
# Latest Data Info
Latest data info after previous tasks:
{column_info}
# Task
Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
Specifically, {tool_type_usage_prompt}
"""
USE_NO_TOOLS_EXAMPLE = """
# Output Example:
when current task is "train a lightgbm model on training data", the code can be like:
```python
# Step 1: check data type and convert to numeric
obj_cols = train.select_dtypes(include='object').columns.tolist()
for col in obj_cols:
encoder = LabelEncoder()
train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])
test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')
test[col] = encoder.transform(test[col])
# Step 2: train lightgbm model
model = LGBMClassifier()
model.fit(train, y_train)
```end
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
"""
USE_TOOLS_EXAMPLE = """
# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
# Available Tools:
Each Class tool is described in JSON format. When you call a tool, import the tool from its path first.
{tool_schemas}
# Output Example:
when current task is "do data preprocess, like fill missing value, handle outliers, etc.", the code can be like:
```python
# Step 1: fill missing value
# Tools used: ['FillMissingValue']
from metagpt.tools.libs.data_preprocess import FillMissingValue
train_processed = train.copy()
test_processed = test.copy()
num_cols = train_processed.select_dtypes(include='number').columns.tolist()
if 'label' in num_cols:
num_cols.remove('label')
fill_missing_value = FillMissingValue(features=num_cols, strategy='mean')
fill_missing_value.fit(train_processed)
train_processed = fill_missing_value.transform(train_processed)
test_processed = fill_missing_value.transform(test_processed)
# Step 2: handle outliers
for col in num_cols:
low, high = train_processed[col].quantile([0.01, 0.99])
train_processed[col] = train_processed[col].clip(low, high)
test_processed[col] = test_processed[col].clip(low, high)
```end
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
- Always copy the DataFrame before processing it and use the copy to process.
"""
ML_GENERATE_CODE_PROMPT = ML_COMMON_PROMPT + USE_NO_TOOLS_EXAMPLE
ML_TOOL_USAGE_PROMPT = ML_COMMON_PROMPT + USE_TOOLS_EXAMPLE

View file

@ -1,93 +1,112 @@
ASSIGN_TASK_TYPE_PROMPT = """
Please assign a task type to each task in the list below from the given categories:
{task_info}
INTERPRETER_SYSTEM_MSG = """As a data scientist, you need to help user to achieve their goal step by step in a continuous Jupyter notebook. Since it is a notebook environment, don't use asyncio.run. Instead, use await if you need to call an async function."""
## All Task Type:
{task_type_desc}
STRUCTUAL_PROMPT = """
# User Requirement
{user_requirement}
# Plan Status
{plan_status}
# Tool Info
{tool_info}
# Constraints
- Take on Current Task if it is in Plan Status, otherwise, tackle User Requirement directly.
- Ensure the output new code is executable in the same Jupyter notebook as the previous executed code.
- Always prioritize using pre-defined tools for the same functionality.
# Output
While some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:
```python
your code
```
"""
ASSIGN_TASK_TYPE_CONFIG = {
"name": "assign_task_type",
"description": "Assign task type to each task by order.",
"parameters": {
"type": "object",
"properties": {
"task_type": {
"type": "array",
"description": "List of task type. The length should as long as task list",
"items": {
"type": "string",
},
},
},
"required": ["task_type"],
},
}
REFLECTION_SYSTEM_MSG = """You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation."""
TOOL_RECOMMENDATION_PROMPT = """
## User Requirement:
{current_task}
DEBUG_REFLECTION_EXAMPLE = '''
[previous impl]:
assistant:
```python
def add(a: int, b: int) -> int:
"""
Given integers a and b, return the total value of a and b.
"""
return a - b
```
## Task
Recommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'.
user:
Tests failed:
assert add(1, 2) == 3 # output: -1
assert add(1, 2) == 4 # output: -1
## Available Tools:
{available_tools}
[reflection on previous impl]:
The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
## Tool Selection and Instructions:
- Select tools most relevant to completing the 'User Requirement'.
- If you believe that no tools are suitable, indicate with an empty list.
- Only list the names of the tools, not the full schema of each tool.
- Ensure selected tools are listed in 'Available Tools'.
[improved impl]:
def add(a: int, b: int) -> int:
"""
Given integers a and b, return the total value of a and b.
"""
return a + b
'''
REFLECTION_PROMPT = """
[example]
Here is an example of debugging with reflection.
{debug_example}
[/example]
[context]
{context}
[previous impl]:
{previous_impl}
[instruction]
Analyze your previous code and error in [context] step by step, provide me with improved method and code. Remember to follow [context] requirement. Don't forget to write code for steps behind the error step.
Output a json following the format:
```json
{{
"reflection": str = "Reflection on previous implementation",
"improved_impl": str = "Refined code after reflection.",
}}
```
"""
SELECT_FUNCTION_TOOLS = {
"name": "select_function_tools",
"description": "For current task, select suitable tools for it.",
"parameters": {
"type": "object",
"properties": {
"recommend_tools": {
"type": "array",
"description": "List of tool names. Empty list if no tool is suitable.",
"items": {
"type": "string",
},
},
},
"required": ["recommend_tools"],
},
}
CHECK_DATA_PROMPT = """
# Background
Check latest data info to guide subsequent tasks.
CODE_GENERATOR_WITH_TOOLS = {
"name": "add_subtask_code",
"description": "Add new code cell of current task to the end of an active Jupyter notebook.",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The code to be added to a new cell in jupyter.",
},
},
"required": ["code"],
},
}
## Finished Tasks
```python
{code_written}
```end
TOOL_USAGE_PROMPT = """
# Instruction
Write complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.
Specifically, {tool_type_usage_prompt}
# Task
Check code in finished tasks, print key variables to guide your following actions.
Specifically, if it is a data analysis or machine learning task, print the the latest column information using the following code, with DataFrame variable from 'Finished Tasks' in place of df:
```python
from metagpt.tools.libs.data_preprocess import get_column_info
# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
# Available Tools (can be empty):
Each Class tool is described in JSON format. When you call a tool, import the tool first.
{tool_schemas}
column_info = get_column_info(df)
print("column_info")
print(column_info)
```end
Otherwise, print out any key variables you see fit. Return an empty string if you think there is no important data to check.
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
- Your code is to be added to a new cell in jupyter.
# Instruction
Output code following the format:
```python
your code
```
"""
DATA_INFO = """
# Latest Data Info
Latest data info after previous tasks:
{info}
"""

View file

@ -1,11 +1,11 @@
# Prompt for using tools of "eda" type
# Prompt for taking on "eda" tasks
EDA_PROMPT = """
The current task is about exploratory data analysis, please note the following:
- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.
- Remember to `import numpy as np` before using Numpy functions.
"""
# Prompt for using tools of "data_preprocess" type
# Prompt for taking on "data_preprocess" tasks
DATA_PREPROCESS_PROMPT = """
The current task is about data preprocessing, please note the following:
- Monitor data types per column, applying appropriate methods.
@ -15,9 +15,10 @@ The current task is about data preprocessing, please note the following:
- Prefer alternatives to one-hot encoding for categorical data.
- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.
- Each step do data preprocessing to train, must do same for test separately at the same time.
- Always copy the DataFrame before processing it and use the copy to process.
"""
# Prompt for using tools of "feature_engineering" type
# Prompt for taking on "feature_engineering" tasks
FEATURE_ENGINEERING_PROMPT = """
The current task is about feature engineering. when performing it, please adhere to the following principles:
- Generate as diverse features as possible to improve the model's performance step-by-step.
@ -27,9 +28,10 @@ The current task is about feature engineering. when performing it, please adhere
- Each feature engineering operation performed on the train set must also applies to the test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
- Always copy the DataFrame before processing it and use the copy to process.
"""
# Prompt for using tools of "model_train" type
# Prompt for taking on "model_train" tasks
MODEL_TRAIN_PROMPT = """
The current task is about training a model, please ensure high performance:
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
@ -38,14 +40,14 @@ The current task is about training a model, please ensure high performance:
- Set suitable hyperparameters for the model, make metrics as high as possible.
"""
# Prompt for using tools of "model_evaluate" type
# Prompt for taking on "model_evaluate" tasks
MODEL_EVALUATE_PROMPT = """
The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
- Use trained model from previous task result directly, do not mock or reload model yourself.
"""
# Prompt for using tools of "vision" type
# Prompt for taking on "image2webpage" tasks
IMAGE2WEBPAGE_PROMPT = """
The current task is about converting image into webpage code. please note the following:
- Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow.

View file

@ -25,7 +25,7 @@ class AzureOpenAILLM(OpenAILLM):
# https://learn.microsoft.com/zh-cn/azure/ai-services/openai/how-to/migration?tabs=python-new%2Cdalle-fix
self.aclient = AsyncAzureOpenAI(**kwargs)
self.model = self.config.model # Used in _calc_usage & _cons_kwargs
self.pricing_plan = self.config.pricing_plan
self.pricing_plan = self.config.pricing_plan or self.model
def _make_client_kwargs(self) -> dict:
kwargs = dict(

View file

@ -105,7 +105,7 @@ class BaseLLM(ABC):
async def aask(
self,
msg: str,
msg: Union[str, list[dict[str, str]]],
system_msgs: Optional[list[str]] = None,
format_msgs: Optional[list[dict[str, str]]] = None,
images: Optional[Union[str, list[str]]] = None,
@ -120,7 +120,10 @@ class BaseLLM(ABC):
message = []
if format_msgs:
message.extend(format_msgs)
message.append(self._user_msg(msg, images=images))
if isinstance(msg, str):
message.append(self._user_msg(msg, images=images))
else:
message.extend(msg)
logger.debug(message)
rsp = await self.acompletion_text(message, stream=stream, timeout=timeout)
return rsp
@ -138,8 +141,7 @@ class BaseLLM(ABC):
context.append(self._assistant_msg(rsp_text))
return self._extract_assistant_rsp(context)
async def aask_code(self, messages: Union[str, Message, list[dict]], timeout=3) -> dict:
"""FIXME: No code segment filtering has been done here, and all results are actually displayed"""
async def aask_code(self, messages: Union[str, Message, list[dict]], timeout=3, **kwargs) -> dict:
raise NotImplementedError
@abstractmethod

View file

@ -25,6 +25,7 @@ GENERAL_FUNCTION_SCHEMA = {
},
}
# tool_choice value for general_function_schema
# https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
GENERAL_TOOL_CHOICE = {"type": "function", "function": {"name": "execute"}}

View file

@ -29,8 +29,12 @@ from metagpt.logs import log_llm_stream, logger
from metagpt.provider.base_llm import BaseLLM
from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA
from metagpt.provider.llm_provider_registry import register_provider
from metagpt.schema import Message
from metagpt.utils.common import CodeParser, decode_image, log_and_reraise
from metagpt.utils.common import (
CodeParser,
decode_image,
log_and_reraise,
process_message,
)
from metagpt.utils.cost_manager import CostManager
from metagpt.utils.exceptions import handle_exception
from metagpt.utils.token_counter import (
@ -83,7 +87,9 @@ class OpenAILLM(BaseLLM):
collected_messages = []
async for chunk in response:
chunk_message = chunk.choices[0].delta.content or "" if chunk.choices else "" # extract the message
finish_reason = chunk.choices[0].finish_reason if hasattr(chunk.choices[0], "finish_reason") else None
finish_reason = (
chunk.choices[0].finish_reason if chunk.choices and hasattr(chunk.choices[0], "finish_reason") else None
)
log_llm_stream(chunk_message)
collected_messages.append(chunk_message)
if finish_reason:
@ -141,44 +147,16 @@ class OpenAILLM(BaseLLM):
rsp = await self._achat_completion(messages, timeout=timeout)
return self.get_choice_text(rsp)
def _func_configs(self, messages: list[dict], timeout=3, **kwargs) -> dict:
"""Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create"""
if "tools" not in kwargs:
configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
kwargs.update(configs)
return self._cons_kwargs(messages=messages, timeout=timeout, **kwargs)
def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
"""convert messages to list[dict]."""
# 全部转成list
if not isinstance(messages, list):
messages = [messages]
# 转成list[dict]
processed_messages = []
for msg in messages:
if isinstance(msg, str):
processed_messages.append({"role": "user", "content": msg})
elif isinstance(msg, dict):
assert set(msg.keys()) == set(["role", "content"])
processed_messages.append(msg)
elif isinstance(msg, Message):
processed_messages.append(msg.to_dict())
else:
raise ValueError(
f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!"
)
return processed_messages
async def _achat_completion_function(self, messages: list[dict], timeout=3, **chat_configs) -> ChatCompletion:
messages = self._process_message(messages)
kwargs = self._func_configs(messages=messages, timeout=timeout, **chat_configs)
async def _achat_completion_function(
self, messages: list[dict], timeout: int = 3, **chat_configs
) -> ChatCompletion:
messages = process_message(messages)
kwargs = self._cons_kwargs(messages=messages, timeout=timeout, **chat_configs)
rsp: ChatCompletion = await self.aclient.chat.completions.create(**kwargs)
self._update_costs(rsp.usage)
return rsp
async def aask_code(self, messages: list[dict], **kwargs) -> dict:
async def aask_code(self, messages: list[dict], timeout: int = 3, **kwargs) -> dict:
"""Use function of tools to ask a code.
Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create
@ -188,12 +166,15 @@ class OpenAILLM(BaseLLM):
>>> rsp = await llm.aask_code(msg)
# -> {'language': 'python', 'code': "print('Hello, World!')"}
"""
if "tools" not in kwargs:
configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
kwargs.update(configs)
rsp = await self._achat_completion_function(messages, **kwargs)
return self.get_choice_function_arguments(rsp)
def _parse_arguments(self, arguments: str) -> dict:
"""parse arguments in openai function call"""
if "langugae" not in arguments and "code" not in arguments:
if "language" not in arguments and "code" not in arguments:
logger.warning(f"Not found `code`, `language`, We assume it is pure code:\n {arguments}\n. ")
return {"language": "python", "code": arguments}

View file

@ -1,48 +1,97 @@
from __future__ import annotations
from pydantic import Field
import json
from typing import Literal, Union
from pydantic import Field, model_validator
from metagpt.actions.di.ask_review import ReviewConst
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import (
WriteCodeWithoutTools,
WriteCodeWithTools,
)
from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
from metagpt.logs import logger
from metagpt.prompts.di.write_analysis_code import DATA_INFO
from metagpt.roles import Role
from metagpt.schema import Message, Task, TaskResult
from metagpt.strategy.task_type import TaskType
from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
from metagpt.utils.common import CodeParser
REACT_THINK_PROMPT = """
# User Requirement
{user_requirement}
# Context
{context}
Output a json following the format:
```json
{{
"thoughts": str = "Thoughts on current situation, reflect on how you should proceed to fulfill the user requirement",
"state": bool = "Decide whether you need to take more actions to complete the user requirement. Return true if you think so. Return false if you think the requirement has been completely fulfilled."
}}
```
"""
class DataInterpreter(Role):
name: str = "David"
profile: str = "DataInterpreter"
auto_run: bool = True
use_tools: bool = False
use_plan: bool = True
use_reflection: bool = False
execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True)
tools: list[str] = []
tools: Union[str, list[str]] = [] # Use special symbol ["<all>"] to indicate use of all registered tools
tool_recommender: ToolRecommender = None
react_mode: Literal["plan_and_act", "react"] = "plan_and_act"
max_react_loop: int = 10 # used for react mode
def __init__(
self,
auto_run=True,
use_tools=False,
tools=[],
**kwargs,
):
super().__init__(auto_run=auto_run, use_tools=use_tools, tools=tools, **kwargs)
self._set_react_mode(react_mode="plan_and_act", auto_run=auto_run, use_tools=use_tools)
if use_tools and tools:
from metagpt.tools.tool_registry import (
validate_tool_names, # import upon use
)
self.tools = validate_tool_names(tools)
logger.info(f"will only use {self.tools} as tools")
@model_validator(mode="after")
def set_plan_and_tool(self) -> "Interpreter":
self._set_react_mode(react_mode=self.react_mode, max_react_loop=self.max_react_loop, auto_run=self.auto_run)
self.use_plan = (
self.react_mode == "plan_and_act"
) # create a flag for convenience, overwrite any passed-in value
if self.tools:
self.tool_recommender = BM25ToolRecommender(tools=self.tools)
self.set_actions([WriteAnalysisCode])
self._set_state(0)
return self
@property
def working_memory(self):
return self.rc.working_memory
async def _think(self) -> bool:
"""Useful in 'react' mode. Use LLM to decide whether and what to do next."""
user_requirement = self.get_memories()[0].content
context = self.working_memory.get()
if not context:
# just started the run, we need action certainly
self.working_memory.add(self.get_memories()[0]) # add user requirement to working memory
self._set_state(0)
return True
prompt = REACT_THINK_PROMPT.format(user_requirement=user_requirement, context=context)
rsp = await self.llm.aask(prompt)
rsp_dict = json.loads(CodeParser.parse_code(block=None, text=rsp))
self.working_memory.add(Message(content=rsp_dict["thoughts"], role="assistant"))
need_action = rsp_dict["state"]
self._set_state(0) if need_action else self._set_state(-1)
return need_action
async def _act(self) -> Message:
"""Useful in 'react' mode. Return a Message conforming to Role._act interface."""
code, _, _ = await self._write_and_exec_code()
return Message(content=code, role="assistant", cause_by=WriteAnalysisCode)
async def _plan_and_act(self) -> Message:
rsp = await super()._plan_and_act()
await self.execute_code.terminate()
return rsp
async def _act_on_task(self, current_task: Task) -> TaskResult:
"""Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation."""
code, result, is_success = await self._write_and_exec_code()
task_result = TaskResult(code=code, result=result, is_success=is_success)
return task_result
@ -51,14 +100,30 @@ class DataInterpreter(Role):
counter = 0
success = False
# plan info
plan_status = self.planner.get_plan_status() if self.use_plan else ""
# tool info
if self.tools:
context = (
self.working_memory.get()[-1].content if self.working_memory.get() else ""
) # thoughts from _think stage in 'react' mode
plan = self.planner.plan if self.use_plan else None
tool_info = await self.tool_recommender.get_recommended_tool_info(context=context, plan=plan)
else:
tool_info = ""
# data info
await self._check_data()
while not success and counter < max_retry:
### write code ###
code, cause_by = await self._write_code()
code, cause_by = await self._write_code(counter, plan_status, tool_info)
self.working_memory.add(Message(content=code["code"], role="assistant", cause_by=cause_by))
self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by))
### execute code ###
result, success = await self.execute_code.run(**code)
result, success = await self.execute_code.run(code)
print(result)
self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode))
@ -72,14 +137,48 @@ class DataInterpreter(Role):
if ReviewConst.CHANGE_WORDS[0] in review:
counter = 0 # redo the task again with help of human suggestions
return code["code"], result, success
return code, result, success
async def _write_code(self):
todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
async def _write_code(
self,
counter: int,
plan_status: str = "",
tool_info: str = "",
):
todo = self.rc.todo # todo is WriteAnalysisCode
logger.info(f"ready to {todo.name}")
use_reflection = counter > 0 and self.use_reflection # only use reflection after the first trial
context = self.planner.get_useful_memories()
# print(*context, sep="\n***\n")
code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0)
user_requirement = self.get_memories()[0].content
code = await todo.run(
user_requirement=user_requirement,
plan_status=plan_status,
tool_info=tool_info,
working_memory=self.working_memory.get(),
use_reflection=use_reflection,
)
return code, todo
async def _check_data(self):
if (
not self.use_plan
or not self.planner.plan.get_finished_tasks()
or self.planner.plan.current_task.task_type
not in [
TaskType.DATA_PREPROCESS.type_name,
TaskType.FEATURE_ENGINEERING.type_name,
TaskType.MODEL_TRAIN.type_name,
]
):
return
logger.info("Check updated data")
code = await CheckData().run(self.planner.plan)
if not code.strip():
return
result, success = await self.execute_code.run(code)
if success:
print(result)
data_info = DATA_INFO.format(info=result)
self.working_memory.add(Message(content=data_info, role="user", cause_by=CheckData))

View file

@ -1,64 +0,0 @@
from metagpt.actions.di.debug_code import DebugCode
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.ml_action import UpdateDataColumns, WriteCodeWithToolsML
from metagpt.logs import logger
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.tools.tool_type import ToolType
from metagpt.utils.common import any_to_str
class MLEngineer(DataInterpreter):
name: str = "Mark"
profile: str = "MLEngineer"
debug_context: list = []
latest_code: str = ""
async def _write_code(self):
if not self.use_tools:
return await super()._write_code()
# In a trial and errors settings, check whether this is our first attempt to tackle the task. If there is no code execution before, then it is.
is_first_trial = any_to_str(ExecuteNbCode) not in [msg.cause_by for msg in self.working_memory.get()]
if is_first_trial:
# For the first trial, write task code from scratch
column_info = await self._update_data_columns()
logger.info("Write code with tools")
tool_context, code = await WriteCodeWithToolsML(selected_tools=self.tools).run(
context=[], # context assembled inside the Action
plan=self.planner.plan,
column_info=column_info,
)
self.debug_context = tool_context
cause_by = WriteCodeWithToolsML
else:
# Previous trials resulted in error, debug and rewrite the code
logger.warning("We got a bug, now start to debug...")
code = await DebugCode().run(
code=self.latest_code,
runtime_result=self.working_memory.get(),
context=self.debug_context,
)
logger.info(f"new code \n{code}")
cause_by = DebugCode
self.latest_code = code["code"]
return code, cause_by
async def _update_data_columns(self):
current_task = self.planner.plan.current_task
if current_task.task_type not in [
ToolType.DATA_PREPROCESS.type_name,
ToolType.FEATURE_ENGINEERING.type_name,
ToolType.MODEL_TRAIN.type_name,
]:
return ""
logger.info("Check columns in updated data")
code = await UpdateDataColumns().run(self.planner.plan)
success = False
result, success = await self.execute_code.run(**code)
print(result)
return result if success else ""

View file

@ -283,7 +283,7 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
self.actions.append(i)
self.states.append(f"{len(self.actions) - 1}. {action}")
def _set_react_mode(self, react_mode: str, max_react_loop: int = 1, auto_run: bool = True, use_tools: bool = False):
def _set_react_mode(self, react_mode: str, max_react_loop: int = 1, auto_run: bool = True):
"""Set strategy of the Role reacting to observed Message. Variation lies in how
this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
@ -304,9 +304,7 @@ class Role(SerializationMixin, ContextMixin, BaseModel):
if react_mode == RoleReactMode.REACT:
self.rc.max_react_loop = max_react_loop
elif react_mode == RoleReactMode.PLAN_AND_ACT:
self.planner = Planner(
goal=self.goal, working_memory=self.rc.working_memory, auto_run=auto_run, use_tools=use_tools
)
self.planner = Planner(goal=self.goal, working_memory=self.rc.working_memory, auto_run=auto_run)
def _watch(self, actions: Iterable[Type[Action]] | Iterable[Action]):
"""Watch Actions of interest. Role will select Messages caused by these Actions from its personal message

View file

@ -13,6 +13,8 @@ from metagpt.actions.di.write_plan import (
from metagpt.logs import logger
from metagpt.memory import Memory
from metagpt.schema import Message, Plan, Task, TaskResult
from metagpt.strategy.task_type import TaskType
from metagpt.utils.common import remove_comments
STRUCTURAL_CONTEXT = """
## User Requirement
@ -25,6 +27,24 @@ STRUCTURAL_CONTEXT = """
{current_task}
"""
PLAN_STATUS = """
## Finished Tasks
### code
```python
{code_written}
```
### execution result
{task_results}
## Current Task
{current_task}
## Task Guidance
Write complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.
Specifically, {guidance}
"""
class Planner(BaseModel):
plan: Plan
@ -32,7 +52,6 @@ class Planner(BaseModel):
default_factory=Memory
) # memory for working on each task, discarded each time a task is done
auto_run: bool = False
use_tools: bool = False
def __init__(self, goal: str = "", plan: Plan = None, **kwargs):
plan = plan or Plan(goal=goal)
@ -53,7 +72,7 @@ class Planner(BaseModel):
plan_confirmed = False
while not plan_confirmed:
context = self.get_useful_memories()
rsp = await WritePlan().run(context, max_tasks=max_tasks, use_tools=self.use_tools)
rsp = await WritePlan().run(context, max_tasks=max_tasks)
self.working_memory.add(Message(content=rsp, role="assistant", cause_by=WritePlan))
# precheck plan before asking reviews
@ -137,3 +156,23 @@ class Planner(BaseModel):
context_msg = [Message(content=context, role="user")]
return context_msg + self.working_memory.get()
def get_plan_status(self) -> str:
# prepare components of a plan status
finished_tasks = self.plan.get_finished_tasks()
code_written = [remove_comments(task.code) for task in finished_tasks]
code_written = "\n\n".join(code_written)
task_results = [task.result for task in finished_tasks]
task_results = "\n\n".join(task_results)
task_type_name = self.current_task.task_type.upper()
guidance = TaskType[task_type_name].value.guidance if hasattr(TaskType, task_type_name) else ""
# combine components in a prompt
prompt = PLAN_STATUS.format(
code_written=code_written,
task_results=task_results,
current_task=self.current_task.instruction,
guidance=guidance,
)
return prompt

View file

@ -0,0 +1,73 @@
from enum import Enum
from pydantic import BaseModel
from metagpt.prompts.task_type import (
DATA_PREPROCESS_PROMPT,
EDA_PROMPT,
FEATURE_ENGINEERING_PROMPT,
IMAGE2WEBPAGE_PROMPT,
MODEL_EVALUATE_PROMPT,
MODEL_TRAIN_PROMPT,
)
class TaskTypeDef(BaseModel):
name: str
desc: str = ""
guidance: str = ""
class TaskType(Enum):
"""By identifying specific types of tasks, we can inject human priors (guidance) to help task solving"""
EDA = TaskTypeDef(
name="eda",
desc="For performing exploratory data analysis",
guidance=EDA_PROMPT,
)
DATA_PREPROCESS = TaskTypeDef(
name="data preprocessing",
desc="For preprocessing dataset in a data analysis or machine learning task ONLY,"
"general data operation doesn't fall into this type",
guidance=DATA_PREPROCESS_PROMPT,
)
FEATURE_ENGINEERING = TaskTypeDef(
name="feature engineering",
desc="Only for creating new columns for input data.",
guidance=FEATURE_ENGINEERING_PROMPT,
)
MODEL_TRAIN = TaskTypeDef(
name="model train",
desc="Only for training model.",
guidance=MODEL_TRAIN_PROMPT,
)
MODEL_EVALUATE = TaskTypeDef(
name="model evaluate",
desc="Only for evaluating model.",
guidance=MODEL_EVALUATE_PROMPT,
)
IMAGE2WEBPAGE = TaskTypeDef(
name="image2webpage",
desc="For converting image into webpage code.",
guidance=IMAGE2WEBPAGE_PROMPT,
)
OTHER = TaskTypeDef(name="other", desc="Any tasks not in the defined categories")
# Legacy TaskType to support tool recommendation using type match. You don't need to define task types if you have no human priors to inject.
TEXT2IMAGE = TaskTypeDef(
name="text2image",
desc="Related to text2image, image2image using stable diffusion model.",
)
WEBSCRAPING = TaskTypeDef(
name="web scraping",
desc="For scraping data from web pages.",
)
EMAIL_LOGIN = TaskTypeDef(
name="email login",
desc="For logging to an email.",
)
@property
def type_name(self):
return self.value.name

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import json
from typing import Literal
import numpy as np
import pandas as pd
@ -16,9 +17,8 @@ from sklearn.preprocessing import (
)
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
TOOL_TYPE = ToolType.DATA_PREPROCESS.type_name
TAGS = ["data preprocessing", "machine learning"]
class MLProcess:
@ -85,20 +85,22 @@ class DataPreprocessTool(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class FillMissingValue(DataPreprocessTool):
"""
Completing missing values with simple strategies.
"""
def __init__(self, features: list, strategy: str = "mean", fill_value=None):
def __init__(
self, features: list, strategy: Literal["mean", "median", "most_frequent", "constant"] = "mean", fill_value=None
):
"""
Initialize self.
Args:
features (list): Columns to be processed.
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
strategy (Literal["mean", "median", "most_frequent", "constant"], optional): The imputation strategy, notice 'mean' and 'median' can only
be used for numeric features. Defaults to 'mean'.
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
Defaults to None.
"""
@ -106,7 +108,7 @@ class FillMissingValue(DataPreprocessTool):
self.model = SimpleImputer(strategy=strategy, fill_value=fill_value)
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class MinMaxScale(DataPreprocessTool):
"""
Transform features by scaling each feature to a range, which is (0, 1).
@ -117,7 +119,7 @@ class MinMaxScale(DataPreprocessTool):
self.model = MinMaxScaler()
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class StandardScale(DataPreprocessTool):
"""
Standardize features by removing the mean and scaling to unit variance.
@ -128,7 +130,7 @@ class StandardScale(DataPreprocessTool):
self.model = StandardScaler()
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class MaxAbsScale(DataPreprocessTool):
"""
Scale each feature by its maximum absolute value.
@ -139,7 +141,7 @@ class MaxAbsScale(DataPreprocessTool):
self.model = MaxAbsScaler()
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class RobustScale(DataPreprocessTool):
"""
Apply the RobustScaler to scale features using statistics that are robust to outliers.
@ -150,7 +152,7 @@ class RobustScale(DataPreprocessTool):
self.model = RobustScaler()
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class OrdinalEncode(DataPreprocessTool):
"""
Encode categorical features as ordinal integers.
@ -161,7 +163,7 @@ class OrdinalEncode(DataPreprocessTool):
self.model = OrdinalEncoder()
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class OneHotEncode(DataPreprocessTool):
"""
Apply one-hot encoding to specified categorical columns, the original columns will be dropped.
@ -180,7 +182,7 @@ class OneHotEncode(DataPreprocessTool):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class LabelEncode(DataPreprocessTool):
"""
Apply label encoding to specified categorical columns in-place.

View file

@ -1,7 +1,6 @@
from imap_tools import MailBox
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
# Define a dictionary mapping email domains to their IMAP server addresses
IMAP_SERVERS = {
@ -24,7 +23,7 @@ IMAP_SERVERS = {
}
@register_tool(tool_type=ToolType.EMAIL_LOGIN.type_name)
@register_tool(tags=["email login"])
def email_login_imap(email_address, email_password):
"""
Use imap_tools package to log in to your email (the email that supports IMAP protocol) to verify and return the account object.

View file

@ -19,12 +19,11 @@ from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from metagpt.tools.libs.data_preprocess import MLProcess
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
TOOL_TYPE = ToolType.FEATURE_ENGINEERING.type_name
TAGS = ["feature engineering", "machine learning"]
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class PolynomialExpansion(MLProcess):
"""
Add polynomial and interaction features from selected numeric columns to input DataFrame.
@ -67,7 +66,7 @@ class PolynomialExpansion(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class CatCount(MLProcess):
"""
Add value counts of a categorical column as new feature.
@ -92,7 +91,7 @@ class CatCount(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class TargetMeanEncoder(MLProcess):
"""
Encode a categorical column by the mean of the label column, and adds the result as a new feature.
@ -119,7 +118,7 @@ class TargetMeanEncoder(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class KFoldTargetMeanEncoder(MLProcess):
"""
Add a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column.
@ -159,7 +158,7 @@ class KFoldTargetMeanEncoder(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class CatCross(MLProcess):
"""
Add pairwise crossed features and convert them to numerical features.
@ -216,7 +215,7 @@ class CatCross(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class GroupStat(MLProcess):
"""
Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'.
@ -248,7 +247,7 @@ class GroupStat(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class SplitBins(MLProcess):
"""
Inplace binning of continuous data into intervals, returning integer-encoded bin identifiers directly.
@ -276,7 +275,7 @@ class SplitBins(MLProcess):
return new_df
# @register_tool(tool_type=TOOL_TYPE)
# @register_tool(tags=TAGS)
class ExtractTimeComps(MLProcess):
"""
Extract time components from a datetime column and add them as new features.
@ -316,7 +315,7 @@ class ExtractTimeComps(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class GeneralSelection(MLProcess):
"""
Drop all nan feats and feats with only one unique value.
@ -349,7 +348,7 @@ class GeneralSelection(MLProcess):
# skip for now because lgb is needed
# @register_tool(tool_type=TOOL_TYPE)
# @register_tool(tags=TAGS)
class TreeBasedSelection(MLProcess):
"""
Select features based on tree-based model and remove features with low importance.
@ -403,7 +402,7 @@ class TreeBasedSelection(MLProcess):
return new_df
@register_tool(tool_type=TOOL_TYPE)
@register_tool(tags=TAGS)
class VarianceBasedSelection(MLProcess):
"""
Select features based on variance and remove features with low variance.

View file

@ -5,13 +5,13 @@
@Author : mannaandpoem
@File : gpt_v_generator.py
"""
import os
import re
from pathlib import Path
from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.logs import logger
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
from metagpt.utils.common import encode_image
from metagpt.utils.common import CodeParser, encode_image
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX designer, please generate layout information for this image:
@ -28,11 +28,9 @@ As the design pays tribute to large companies, sometimes it is normal for some c
Now, please generate the corresponding webpage code including HTML, CSS and JavaScript:"""
@register_tool(
tool_type=ToolType.IMAGE2WEBPAGE.type_name, include_functions=["__init__", "generate_webpages", "save_webpages"]
)
@register_tool(tags=["image2webpage"], include_functions=["__init__", "generate_webpages", "save_webpages"])
class GPTvGenerator:
"""Class for generating webpages at once.
"""Class for generating webpage code from a given webpage screenshot.
This class provides methods to generate webpages including all code (HTML, CSS, and JavaScript) based on an image.
It utilizes a vision model to analyze the layout from an image and generate webpage codes accordingly.
@ -75,50 +73,34 @@ class GPTvGenerator:
return await self.llm.aask(msg=prompt, images=[encode_image(image_path)])
@staticmethod
def save_webpages(image_path: str, webpages: str) -> Path:
def save_webpages(webpages: str, save_folder_name: str = "example") -> Path:
"""Save webpages including all code (HTML, CSS, and JavaScript) at once.
Args:
image_path (str): The path of the image file.
webpages (str): The generated webpages content.
save_folder_name (str, optional): The name of the folder to save the webpages. Defaults to 'example'.
Returns:
Path: The path of the saved webpages.
"""
# Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files
webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem
os.makedirs(webpages_path, exist_ok=True)
webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / save_folder_name
logger.info(f"code will be saved at {webpages_path}")
webpages_path.mkdir(parents=True, exist_ok=True)
index_path = webpages_path / "index.html"
try:
index = webpages.split("```html")[1].split("```")[0]
style_path = None
if "styles.css" in index:
style_path = webpages_path / "styles.css"
elif "style.css" in index:
style_path = webpages_path / "style.css"
style = webpages.split("```css")[1].split("```")[0] if style_path else ""
index_path.write_text(CodeParser.parse_code(block=None, text=webpages, lang="html"))
js_path = None
if "scripts.js" in index:
js_path = webpages_path / "scripts.js"
elif "script.js" in index:
js_path = webpages_path / "script.js"
extract_and_save_code(folder=webpages_path, text=webpages, pattern="styles?.css", language="css")
js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
except IndexError:
raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}")
try:
with open(index_path, "w", encoding="utf-8") as f:
f.write(index)
if style_path:
with open(style_path, "w", encoding="utf-8") as f:
f.write(style)
if js_path:
with open(js_path, "w", encoding="utf-8") as f:
f.write(js)
except FileNotFoundError as e:
raise FileNotFoundError(f"Cannot save the webpages to {str(webpages_path)}") from e
extract_and_save_code(folder=webpages_path, text=webpages, pattern="scripts?.js", language="javascript")
return webpages_path
def extract_and_save_code(folder, text, pattern, language):
word = re.search(pattern, text)
if word:
path = folder / word.group(0)
code = CodeParser.parse_code(block=None, text=text, lang=language)
path.write_text(code, encoding="utf-8")

View file

@ -14,11 +14,9 @@ import requests
from aiohttp import ClientSession
from PIL import Image, PngImagePlugin
#
from metagpt.const import SD_OUTPUT_FILE_REPO, SOURCE_ROOT
from metagpt.logs import logger
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
payload = {
"prompt": "",
@ -55,7 +53,7 @@ default_negative_prompt = "(easynegative:0.8),black, dark,Low resolution"
@register_tool(
tool_type=ToolType.STABLE_DIFFUSION.type_name,
tags=["text2image", "multimodal"],
include_functions=["__init__", "simple_run_t2i", "run_t2i", "construct_payload", "save"],
)
class SDEngine:

View file

@ -1,9 +1,8 @@
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper
@register_tool(tool_type=ToolType.WEBSCRAPING.type_name)
@register_tool(tags=["web scraping", "web"])
async def scrape_web_playwright(url):
"""
Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.

View file

@ -2,14 +2,18 @@ import inspect
from metagpt.utils.parse_docstring import GoogleDocstringParser, remove_spaces
PARSER = GoogleDocstringParser
def convert_code_to_tool_schema(obj, include: list[str] = []):
def convert_code_to_tool_schema(obj, include: list[str] = None):
docstring = inspect.getdoc(obj)
assert docstring, "no docstring found for the objects, skip registering"
if inspect.isclass(obj):
schema = {"type": "class", "description": remove_spaces(docstring), "methods": {}}
for name, method in inspect.getmembers(obj, inspect.isfunction):
if name.startswith("_") and name != "__init__": # skip private methodss
continue
if include and name not in include:
continue
# method_doc = inspect.getdoc(method)
@ -23,54 +27,31 @@ def convert_code_to_tool_schema(obj, include: list[str] = []):
return schema
def function_docstring_to_schema(fn_obj, docstring):
def function_docstring_to_schema(fn_obj, docstring) -> dict:
"""
Converts a function's docstring into a schema dictionary.
Args:
fn_obj: The function object.
docstring: The docstring of the function.
Returns:
A dictionary representing the schema of the function's docstring.
The dictionary contains the following keys:
- 'type': The type of the function ('function' or 'async_function').
- 'description': The first section of the docstring describing the function overall. Provided to LLMs for both recommending and using the function.
- 'signature': The signature of the function, which helps LLMs understand how to call the function.
- 'parameters': Docstring section describing parameters including args and returns, served as extra details for LLM perception.
"""
signature = inspect.signature(fn_obj)
docstring = remove_spaces(docstring)
overall_desc, param_desc = PARSER.parse(docstring)
function_type = "function" if not inspect.iscoroutinefunction(fn_obj) else "async_function"
return {"type": function_type, **docstring_to_schema(docstring)}
def docstring_to_schema(docstring: str):
if docstring is None:
return {}
parser = GoogleDocstringParser(docstring=docstring)
# 匹配简介部分
description = parser.parse_desc()
# 匹配Args部分
params = parser.parse_params()
parameter_schema = {"properties": {}, "required": []}
for param in params:
param_name, param_type, param_desc = param
# check required or optional
is_optional, param_type = parser.check_and_parse_optional(param_type)
if not is_optional:
parameter_schema["required"].append(param_name)
# type and desc
param_dict = {"type": param_type, "description": remove_spaces(param_desc)}
# match Default for optional args
has_default_val, default_val = parser.check_and_parse_default_value(param_desc)
if has_default_val:
param_dict["default"] = default_val
# match Enum
has_enum, enum_vals = parser.check_and_parse_enum(param_desc)
if has_enum:
param_dict["enum"] = enum_vals
# add to parameter schema
parameter_schema["properties"].update({param_name: param_dict})
# 匹配Returns部分
returns = parser.parse_returns()
# 构建YAML字典
schema = {
"description": description,
"parameters": parameter_schema,
}
if returns:
schema["returns"] = [{"type": ret[0], "description": remove_spaces(ret[1])} for ret in returns]
return schema
return {"type": function_type, "description": overall_desc, "signature": str(signature), "parameters": param_desc}
def get_class_method_docstring(cls, method_name):

View file

@ -1,12 +1,6 @@
from pydantic import BaseModel
class ToolTypeDef(BaseModel):
name: str
desc: str = ""
usage_prompt: str = ""
class ToolSchema(BaseModel):
description: str
@ -16,3 +10,4 @@ class Tool(BaseModel):
path: str
schemas: dict = {}
code: str = ""
tags: list[str] = []

View file

@ -0,0 +1,214 @@
from __future__ import annotations
import json
from typing import Any
import jieba
import numpy as np
from pydantic import BaseModel, field_validator
from rank_bm25 import BM25Okapi
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.schema import Plan
from metagpt.tools import TOOL_REGISTRY
from metagpt.tools.tool_data_type import Tool
from metagpt.tools.tool_registry import validate_tool_names
from metagpt.utils.common import CodeParser
TOOL_INFO_PROMPT = """
## Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python class or function.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
## Available Tools:
Each tool is described in JSON format. When you call a tool, import the tool from its path first.
{tool_schemas}
"""
TOOL_RECOMMENDATION_PROMPT = """
## User Requirement:
{current_task}
## Task
Recommend up to {topk} tools from 'Available Tools' that can help solve the 'User Requirement'.
## Available Tools:
{available_tools}
## Tool Selection and Instructions:
- Select tools most relevant to completing the 'User Requirement'.
- If you believe that no tools are suitable, indicate with an empty list.
- Only list the names of the tools, not the full schema of each tool.
- Ensure selected tools are listed in 'Available Tools'.
- Output a json list of tool names:
```json
["tool_name1", "tool_name2", ...]
```
"""
class ToolRecommender(BaseModel):
"""
The default ToolRecommender:
1. Recall: To be implemented in subclasses. Recall tools based on the given context and plan.
2. Rank: Use LLM to select final candidates from recalled set.
"""
tools: dict[str, Tool] = {}
force: bool = False # whether to forcedly recommend the specified tools
@field_validator("tools", mode="before")
@classmethod
def validate_tools(cls, v: list[str]) -> dict[str, Tool]:
# One can use special symbol ["<all>"] to indicate use of all registered tools
if v == ["<all>"]:
return TOOL_REGISTRY.get_all_tools()
else:
return validate_tool_names(v)
async def recommend_tools(
self, context: str = "", plan: Plan = None, recall_topk: int = 20, topk: int = 5
) -> list[Tool]:
"""
Recommends a list of tools based on the given context and plan. The recommendation process includes two stages: recall from a large pool and rank the recalled tools to select the final set.
Args:
context (str): The context for tool recommendation.
plan (Plan): The plan for tool recommendation.
recall_topk (int): The number of tools to recall in the initial step.
topk (int): The number of tools to return after rank as final recommendations.
Returns:
list[Tool]: A list of recommended tools.
"""
if not self.tools:
return []
if self.force or (not context and not plan):
# directly use what users have specified as result for forced recommendation;
# directly use the whole set if there is no useful information
return list(self.tools.values())
recalled_tools = await self.recall_tools(context=context, plan=plan, topk=recall_topk)
if not recalled_tools:
return []
ranked_tools = await self.rank_tools(recalled_tools=recalled_tools, context=context, plan=plan, topk=topk)
logger.info(f"Recommended tools: \n{[tool.name for tool in ranked_tools]}")
return ranked_tools
async def get_recommended_tool_info(self, **kwargs) -> str:
"""
Wrap recommended tools with their info in a string, which can be used directly in a prompt.
"""
recommended_tools = await self.recommend_tools(**kwargs)
if not recommended_tools:
return ""
tool_schemas = {tool.name: tool.schemas for tool in recommended_tools}
return TOOL_INFO_PROMPT.format(tool_schemas=tool_schemas)
async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
"""
Retrieves a list of relevant tools from a large pool, based on the given context and plan.
"""
raise NotImplementedError
async def rank_tools(
self, recalled_tools: list[Tool], context: str = "", plan: Plan = None, topk: int = 5
) -> list[Tool]:
"""
Default rank methods for a ToolRecommender. Use LLM to rank the recalled tools based on the given context, plan, and topk value.
"""
current_task = plan.current_task.instruction if plan else context
available_tools = {tool.name: tool.schemas["description"] for tool in recalled_tools}
prompt = TOOL_RECOMMENDATION_PROMPT.format(
current_task=current_task,
available_tools=available_tools,
topk=topk,
)
rsp = await LLM().aask(prompt)
rsp = CodeParser.parse_code(block=None, text=rsp)
ranked_tools = json.loads(rsp)
valid_tools = validate_tool_names(ranked_tools)
return list(valid_tools.values())[:topk]
class TypeMatchToolRecommender(ToolRecommender):
"""
A legacy ToolRecommender using task type matching at the recall stage:
1. Recall: Find tools based on exact match between task type and tool tag;
2. Rank: LLM rank, the same as the default ToolRecommender.
"""
async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
if not plan:
return list(self.tools.values())[:topk]
# find tools based on exact match between task type and tool tag
task_type = plan.current_task.task_type
candidate_tools = TOOL_REGISTRY.get_tools_by_tag(task_type)
candidate_tool_names = set(self.tools.keys()) & candidate_tools.keys()
recalled_tools = [candidate_tools[tool_name] for tool_name in candidate_tool_names][:topk]
logger.info(f"Recalled tools: \n{[tool.name for tool in recalled_tools]}")
return recalled_tools
class BM25ToolRecommender(ToolRecommender):
"""
A ToolRecommender using BM25 at the recall stage:
1. Recall: Querying tool descriptions with task instruction if plan exists. Otherwise, return all user-specified tools;
2. Rank: LLM rank, the same as the default ToolRecommender.
"""
bm25: Any = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._init_corpus()
def _init_corpus(self):
corpus = [f"{tool.name} {tool.tags}: {tool.schemas['description']}" for tool in self.tools.values()]
tokenized_corpus = [self._tokenize(doc) for doc in corpus]
self.bm25 = BM25Okapi(tokenized_corpus)
def _tokenize(self, text):
return jieba.lcut(text) # FIXME: needs more sophisticated tokenization
async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
query = plan.current_task.instruction if plan else context
query_tokens = self._tokenize(query)
doc_scores = self.bm25.get_scores(query_tokens)
top_indexes = np.argsort(doc_scores)[::-1][:topk]
recalled_tools = [list(self.tools.values())[index] for index in top_indexes]
logger.info(
f"Recalled tools: \n{[tool.name for tool in recalled_tools]}; Scores: {[doc_scores[index] for index in top_indexes]}"
)
return recalled_tools
class EmbeddingToolRecommender(ToolRecommender):
"""
NOTE: To be implemented.
A ToolRecommender using embeddings at the recall stage:
1. Recall: Use embeddings to calculate the similarity between query and tool info;
2. Rank: LLM rank, the same as the default ToolRecommender.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
pass

View file

@ -10,26 +10,20 @@ from __future__ import annotations
import inspect
import os
from collections import defaultdict
from typing import Union
import yaml
from pydantic import BaseModel, field_validator
from pydantic import BaseModel
from metagpt.const import TOOL_SCHEMA_PATH
from metagpt.logs import logger
from metagpt.tools.tool_convert import convert_code_to_tool_schema
from metagpt.tools.tool_data_type import Tool, ToolSchema, ToolTypeDef
from metagpt.tools.tool_type import ToolType
from metagpt.tools.tool_data_type import Tool, ToolSchema
class ToolRegistry(BaseModel):
tools: dict = {}
tool_types: dict = {}
tools_by_types: dict = defaultdict(dict) # two-layer k-v, {tool_type: {tool_name: {...}, ...}, ...}
@field_validator("tool_types", mode="before")
@classmethod
def init_tool_types(cls, tool_types: ToolType):
return {tool_type.type_name: tool_type.value for tool_type in tool_types}
tools_by_tags: dict = defaultdict(dict) # two-layer k-v, {tag: {tool_name: {...}, ...}, ...}
def register_tool(
self,
@ -37,25 +31,15 @@ class ToolRegistry(BaseModel):
tool_path,
schema_path="",
tool_code="",
tool_type="other",
tags=None,
tool_source_object=None,
include_functions=[],
include_functions=None,
verbose=False,
):
if self.has_tool(tool_name):
return
if tool_type not in self.tool_types:
# register new tool type on the fly
logger.warning(
f"{tool_type} not previously defined, will create a temporary tool type with just a name. This tool type is only effective during this runtime. You may consider add this tool type with more configs permanently at metagpt.tools.tool_type"
)
temp_tool_type_obj = ToolTypeDef(name=tool_type)
self.tool_types[tool_type] = temp_tool_type_obj
if verbose:
logger.info(f"tool type {tool_type} registered")
schema_path = schema_path or TOOL_SCHEMA_PATH / tool_type / f"{tool_name}.yml"
schema_path = schema_path or TOOL_SCHEMA_PATH / f"{tool_name}.yml"
schemas = make_schema(tool_source_object, include_functions, schema_path)
@ -70,10 +54,11 @@ class ToolRegistry(BaseModel):
# logger.warning(
# f"{tool_name} schema not conforms to required format, but will be used anyway. Mismatch: {e}"
# )
tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code)
tags = tags or []
tool = Tool(name=tool_name, path=tool_path, schemas=schemas, code=tool_code, tags=tags)
self.tools[tool_name] = tool
self.tools_by_types[tool_type][tool_name] = tool
for tag in tags:
self.tools_by_tags[tag].update({tool_name: tool})
if verbose:
logger.info(f"{tool_name} registered")
logger.info(f"schema made at {str(schema_path)}, can be used for checking")
@ -84,24 +69,24 @@ class ToolRegistry(BaseModel):
def get_tool(self, key) -> Tool:
return self.tools.get(key)
def get_tools_by_type(self, key) -> dict[str, Tool]:
return self.tools_by_types.get(key, {})
def get_tools_by_tag(self, key) -> dict[str, Tool]:
return self.tools_by_tags.get(key, {})
def has_tool_type(self, key) -> bool:
return key in self.tool_types
def get_all_tools(self) -> dict[str, Tool]:
return self.tools
def get_tool_type(self, key) -> ToolType:
return self.tool_types.get(key)
def has_tool_tag(self, key) -> bool:
return key in self.tools_by_tags
def get_tool_types(self) -> dict[str, ToolType]:
return self.tool_types
def get_tool_tags(self) -> list[str]:
return list(self.tools_by_tags.keys())
# Registry instance
TOOL_REGISTRY = ToolRegistry(tool_types=ToolType)
TOOL_REGISTRY = ToolRegistry()
def register_tool(tool_type: str = "other", schema_path: str = "", **kwargs):
def register_tool(tags: list[str] = None, schema_path: str = "", **kwargs):
"""register a tool to registry"""
def decorator(cls):
@ -117,7 +102,7 @@ def register_tool(tool_type: str = "other", schema_path: str = "", **kwargs):
tool_path=file_path,
schema_path=schema_path,
tool_code=source_code,
tool_type=tool_type,
tags=tags,
tool_source_object=cls,
**kwargs,
)
@ -142,14 +127,15 @@ def make_schema(tool_source_object, include, path):
return schema
def validate_tool_names(tools: list[str], return_tool_object=False) -> list[str]:
valid_tools = []
for tool_name in tools:
if not TOOL_REGISTRY.has_tool(tool_name):
logger.warning(
f"Specified tool {tool_name} not found and was skipped. Check if you have registered it properly"
)
def validate_tool_names(tools: Union[list[str], str]) -> str:
assert isinstance(tools, list), "tools must be a list of str"
valid_tools = {}
for key in tools:
# one can define either tool names or tool type names, take union to get the whole set
if TOOL_REGISTRY.has_tool(key):
valid_tools.update({key: TOOL_REGISTRY.get_tool(key)})
elif TOOL_REGISTRY.has_tool_tag(key):
valid_tools.update(TOOL_REGISTRY.get_tools_by_tag(key))
else:
valid_tool = TOOL_REGISTRY.get_tool(tool_name) if return_tool_object else tool_name
valid_tools.append(valid_tool)
logger.warning(f"invalid tool name or tool type name: {key}, skipped")
return valid_tools

View file

@ -1,64 +0,0 @@
from enum import Enum
from metagpt.prompts.tool_types import (
DATA_PREPROCESS_PROMPT,
EDA_PROMPT,
FEATURE_ENGINEERING_PROMPT,
IMAGE2WEBPAGE_PROMPT,
MODEL_EVALUATE_PROMPT,
MODEL_TRAIN_PROMPT,
)
from metagpt.tools.tool_data_type import ToolTypeDef
class ToolType(Enum):
EDA = ToolTypeDef(
name="eda",
desc="For performing exploratory data analysis",
usage_prompt=EDA_PROMPT,
)
DATA_PREPROCESS = ToolTypeDef(
name="data_preprocess",
desc="Only for changing value inplace.",
usage_prompt=DATA_PREPROCESS_PROMPT,
)
EMAIL_LOGIN = ToolTypeDef(
name="email_login",
desc="For logging to an email.",
)
FEATURE_ENGINEERING = ToolTypeDef(
name="feature_engineering",
desc="Only for creating new columns for input data.",
usage_prompt=FEATURE_ENGINEERING_PROMPT,
)
MODEL_TRAIN = ToolTypeDef(
name="model_train",
desc="Only for training model.",
usage_prompt=MODEL_TRAIN_PROMPT,
)
MODEL_EVALUATE = ToolTypeDef(
name="model_evaluate",
desc="Only for evaluating model.",
usage_prompt=MODEL_EVALUATE_PROMPT,
)
STABLE_DIFFUSION = ToolTypeDef(
name="stable_diffusion",
desc="Related to text2image, image2image using stable diffusion model.",
)
IMAGE2WEBPAGE = ToolTypeDef(
name="image2webpage",
desc="For converting image into webpage code.",
usage_prompt=IMAGE2WEBPAGE_PROMPT,
)
WEBSCRAPING = ToolTypeDef(
name="web_scraping",
desc="For scraping data from web pages.",
)
OTHER = ToolTypeDef(name="other", desc="Any tools not in the defined categories")
def __missing__(self, key):
return self.OTHER
@property
def type_name(self):
return self.value.name

View file

@ -361,16 +361,6 @@ def parse_recipient(text):
return ""
def create_func_call_config(func_schema: dict) -> dict:
"""Create new function call config"""
tools = [{"type": "function", "function": func_schema}]
tool_choice = {"type": "function", "function": {"name": func_schema["name"]}}
return {
"tools": tools,
"tool_choice": tool_choice,
}
def remove_comments(code_str: str) -> str:
"""Remove comments from code."""
pattern = r"(\".*?\"|\'.*?\')|(\#.*?$)"
@ -812,6 +802,29 @@ def decode_image(img_url_or_b64: str) -> Image:
return img
def process_message(messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
"""convert messages to list[dict]."""
from metagpt.schema import Message
# 全部转成list
if not isinstance(messages, list):
messages = [messages]
# 转成list[dict]
processed_messages = []
for msg in messages:
if isinstance(msg, str):
processed_messages.append({"role": "user", "content": msg})
elif isinstance(msg, dict):
assert set(msg.keys()) == set(["role", "content"])
processed_messages.append(msg)
elif isinstance(msg, Message):
processed_messages.append(msg.to_dict())
else:
raise ValueError(f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!")
return processed_messages
def log_and_reraise(retry_state: RetryCallState):
logger.error(f"Retry attempts exhausted. Last exception: {retry_state.outcome.exception()}")
logger.warning(

View file

@ -1,45 +1,23 @@
import re
from typing import Tuple
from pydantic import BaseModel
def remove_spaces(text):
return re.sub(r"\s+", " ", text).strip()
class DocstringParser(BaseModel):
docstring: str
class DocstringParser:
@staticmethod
def parse(docstring: str) -> Tuple[str, str]:
"""Parse the docstring and return the overall description and the parameter description.
def parse_desc(self) -> str:
"""Parse and return the description from the docstring."""
def parse_params(self) -> list[Tuple[str, str, str]]:
"""Parse and return the parameters from the docstring.
Args:
docstring (str): The docstring to be parsed.
Returns:
list[Tuple[str, str, str]]: A list of input paramter info. Each info is a triple of (param name, param type, param description)
Tuple[str, str]: A tuple of (overall description, parameter description)
"""
def parse_returns(self) -> list[Tuple[str, str]]:
"""Parse and return the output information from the docstring.
Returns:
list[Tuple[str, str]]: A list of output info. Each info is a tuple of (return type, return description)
"""
@staticmethod
def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
"""Check if a parameter is optional and return a processed param_type rid of the optionality info if so"""
@staticmethod
def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
"""Check if a parameter has a default value and return the default value if so"""
@staticmethod
def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
"""Check if a parameter description includes an enum and return enum values if so"""
class reSTDocstringParser(DocstringParser):
"""A parser for reStructuredText (reST) docstring"""
@ -48,40 +26,18 @@ class reSTDocstringParser(DocstringParser):
class GoogleDocstringParser(DocstringParser):
"""A parser for Google-stype docstring"""
docstring: str
def parse_desc(self) -> str:
description_match = re.search(r"^(.*?)(?:Args:|Returns:|Raises:|$)", self.docstring, re.DOTALL)
description = remove_spaces(description_match.group(1)) if description_match else ""
return description
def parse_params(self) -> list[Tuple[str, str, str]]:
args_match = re.search(r"Args:\s*(.*?)(?:Returns:|Raises:|$)", self.docstring, re.DOTALL)
_args = args_match.group(1).strip() if args_match else ""
# variable_pattern = re.compile(r"(\w+)\s*\((.*?)\):\s*(.*)")
variable_pattern = re.compile(
r"(\w+)\s*\((.*?)\):\s*(.*?)(?=\n\s*\w+\s*\(|\Z)", re.DOTALL
) # (?=\n\w+\s*\(|\Z) is to assert that what follows is either the start of the next parameter (indicated by a newline, some word characters, and an opening parenthesis) or the end of the string (\Z).
params = variable_pattern.findall(_args)
return params
def parse_returns(self) -> list[Tuple[str, str]]:
returns_match = re.search(r"Returns:\s*(.*?)(?:Raises:|$)", self.docstring, re.DOTALL)
returns = returns_match.group(1).strip() if returns_match else ""
return_pattern = re.compile(r"^(.*)\s*:\s*(.*)$")
returns = return_pattern.findall(returns)
return returns
@staticmethod
def check_and_parse_optional(param_type: str) -> Tuple[bool, str]:
return "optional" in param_type, param_type.replace(", optional", "")
def parse(docstring: str) -> Tuple[str, str]:
if not docstring:
return "", ""
@staticmethod
def check_and_parse_default_value(param_desc: str) -> Tuple[bool, str]:
default_val = re.search(r"Defaults to (.+?)\.", param_desc)
return (True, default_val.group(1)) if default_val else (False, "")
docstring = remove_spaces(docstring)
@staticmethod
def check_and_parse_enum(param_desc: str) -> Tuple[bool, str]:
enum_val = re.search(r"Enum: \[(.+?)\]", param_desc)
return (True, [e.strip() for e in enum_val.group(1).split(",")]) if enum_val else (False, [])
if "Args:" in docstring:
overall_desc, param_desc = docstring.split("Args:")
param_desc = "Args:" + param_desc
else:
overall_desc = docstring
param_desc = ""
return overall_desc, param_desc

View file

@ -71,3 +71,5 @@ Pillow
imap_tools==1.5.0 # Used by metagpt/tools/libs/email_login.py
qianfan==0.3.2
dashscope==1.14.1
rank-bm25==0.2.2 # for tool recommendation
jieba==0.42.1 # for tool recommendation

File diff suppressed because one or more lines are too long

View file

@ -1,51 +0,0 @@
# -*- coding: utf-8 -*-
# @Date : 1/11/2024 8:51 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import pytest
from metagpt.actions.di.debug_code import DebugCode
from metagpt.schema import Message
ErrorStr = """Tested passed:
Tests failed:
assert sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5] # output: [1, 2, 4, 3, 5]
"""
CODE = """
def sort_array(arr):
# Helper function to count the number of ones in the binary representation
def count_ones(n):
return bin(n).count('1')
# Sort the array using a custom key function
# The key function returns a tuple (number of ones, value) for each element
# This ensures that if two elements have the same number of ones, they are sorted by their value
sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x))
return sorted_arr
```
"""
DebugContext = '''Solve the problem in Python:
def sort_array(arr):
"""
In this Kata, you have to sort an array of non-negative integers according to
number of ones in their binary representation in ascending order.
For similar number of ones, sort based on decimal value.
It must be implemented like this:
>>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]
>>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]
>>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]
"""
'''
@pytest.mark.asyncio
async def test_debug_code():
debug_context = Message(content=DebugContext)
new_code = await DebugCode().run(context=debug_context, code=CODE, runtime_result=ErrorStr)
assert "def sort_array(arr)" in new_code["code"]

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.di.execute_nb_code import ExecuteNbCode, truncate
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
@pytest.mark.asyncio
@ -8,6 +8,7 @@ async def test_code_running():
executor = ExecuteNbCode()
output, is_success = await executor.run("print('hello world!')")
assert is_success
await executor.terminate()
@pytest.mark.asyncio
@ -17,6 +18,7 @@ async def test_split_code_running():
_ = await executor.run("z=x+y")
output, is_success = await executor.run("assert z==3")
assert is_success
await executor.terminate()
@pytest.mark.asyncio
@ -24,6 +26,7 @@ async def test_execute_error():
executor = ExecuteNbCode()
output, is_success = await executor.run("z=1/0")
assert not is_success
await executor.terminate()
PLOT_CODE = """
@ -52,21 +55,7 @@ async def test_plotting_code():
executor = ExecuteNbCode()
output, is_success = await executor.run(PLOT_CODE)
assert is_success
def test_truncate():
# 代码执行成功
output, is_success = truncate("hello world", 5, True)
assert "Truncated to show only first 5 characters\nhello" in output
assert is_success
# 代码执行失败
output, is_success = truncate("hello world", 5, False)
assert "Truncated to show only last 5 characters\nworld" in output
assert not is_success
# 异步
output, is_success = truncate("<coroutine object", 5, True)
assert not is_success
assert "await" in output
await executor.terminate()
@pytest.mark.asyncio
@ -76,6 +65,7 @@ async def test_run_with_timeout():
message, success = await executor.run(code)
assert not success
assert message.startswith("Cell execution timed out")
await executor.terminate()
@pytest.mark.asyncio
@ -83,7 +73,7 @@ async def test_run_code_text():
executor = ExecuteNbCode()
message, success = await executor.run(code='print("This is a code!")', language="python")
assert success
assert message == "This is a code!\n"
assert "This is a code!" in message
message, success = await executor.run(code="# This is a code!", language="markdown")
assert success
assert message == "# This is a code!"
@ -91,19 +81,22 @@ async def test_run_code_text():
message, success = await executor.run(code=mix_text, language="markdown")
assert success
assert message == mix_text
await executor.terminate()
@pytest.mark.asyncio
async def test_terminate():
executor = ExecuteNbCode()
await executor.run(code='print("This is a code!")', language="python")
is_kernel_alive = await executor.nb_client.km.is_alive()
assert is_kernel_alive
await executor.terminate()
import time
time.sleep(2)
assert executor.nb_client.km is None
@pytest.mark.parametrize(
"k", [(1), (5)]
) # k=1 to test a single regular terminate, k>1 to test terminate under continuous run
async def test_terminate(k):
for _ in range(k):
executor = ExecuteNbCode()
await executor.run(code='print("This is a code!")', language="python")
is_kernel_alive = await executor.nb_client.km.is_alive()
assert is_kernel_alive
await executor.terminate()
assert executor.nb_client.km is None
assert executor.nb_client.kc is None
@pytest.mark.asyncio
@ -114,3 +107,22 @@ async def test_reset():
assert is_kernel_alive
await executor.reset()
assert executor.nb_client.km is None
await executor.terminate()
@pytest.mark.asyncio
async def test_parse_outputs():
executor = ExecuteNbCode()
code = """
import pandas as pd
df = pd.DataFrame({'ID': [1,2,3], 'NAME': ['a', 'b', 'c']})
print(df.columns)
print(f"columns num:{len(df.columns)}")
print(df['DUMMPY_ID'])
"""
output, is_success = await executor.run(code)
assert not is_success
assert "Index(['ID', 'NAME'], dtype='object')" in output
assert "KeyError: 'DUMMPY_ID'" in output
assert "columns num:2" in output
await executor.terminate()

View file

@ -1,46 +0,0 @@
import pytest
from metagpt.actions.di.ml_action import WriteCodeWithToolsML
from metagpt.schema import Plan, Task
@pytest.mark.asyncio
async def test_write_code_with_tools():
write_code_ml = WriteCodeWithToolsML()
task_map = {
"1": Task(
task_id="1",
instruction="随机生成一个pandas DataFrame数据集",
task_type="other",
dependent_task_ids=[],
code="""
import pandas as pd
df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [1.1, 2.2, 3.3, 4.4, np.nan],
'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
'd': [1, 2, 3, 4, 5]
})
""",
is_finished=True,
),
"2": Task(
task_id="2",
instruction="对数据集进行数据清洗",
task_type="data_preprocess",
dependent_task_ids=["1"],
),
}
plan = Plan(
goal="构造数据集并进行数据清洗",
tasks=list(task_map.values()),
task_map=task_map,
current_task_id="2",
)
column_info = ""
_, code_with_ml = await write_code_ml.run([], plan, column_info)
code_with_ml = code_with_ml["code"]
assert len(code_with_ml) > 0
print(code_with_ml)

View file

@ -1,134 +1,61 @@
import asyncio
import pytest
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import (
WriteCodeWithoutTools,
WriteCodeWithTools,
)
from metagpt.logs import logger
from metagpt.schema import Message, Plan, Task
from metagpt.strategy.planner import STRUCTURAL_CONTEXT
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_by_list_plan():
write_code = WriteCodeWithoutTools()
execute_code = ExecuteNbCode()
messages = []
plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "回顾已完成的任务", "求均值", "总结"]
for task in plan:
print(f"\n任务: {task}\n\n")
messages.append(Message(task, role="assistant"))
code = await write_code.run(messages)
if task.startswith(("回顾", "总结")):
assert code["language"] == "markdown"
else:
assert code["language"] == "python"
messages.append(Message(code["code"], role="assistant"))
assert len(code) > 0
output, _ = await execute_code.run(**code)
print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
messages.append(output)
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
from metagpt.schema import Message
@pytest.mark.asyncio
async def test_tool_recommendation():
task = "clean and preprocess the data"
available_tools = {
"FillMissingValue": "Filling missing values",
"SplitBins": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
}
write_code = WriteCodeWithTools()
tools = await write_code._recommend_tool(task, available_tools)
async def test_write_code_with_plan():
write_code = WriteAnalysisCode()
assert len(tools) == 1
assert "FillMissingValue" in tools
user_requirement = "Run data analysis on sklearn Iris dataset, include a plot"
plan_status = "\n## Finished Tasks\n### code\n```python\n\n```\n\n### execution result\n\n\n## Current Task\nLoad the sklearn Iris dataset and perform exploratory data analysis\n\n## Task Guidance\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.\nSpecifically, \nThe current task is about exploratory data analysis, please note the following:\n- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.\n- Remember to `import numpy as np` before using Numpy functions.\n\n"
code = await write_code.run(user_requirement=user_requirement, plan_status=plan_status)
assert len(code) > 0
assert "sklearn" in code
@pytest.mark.asyncio
async def test_write_code_with_tools():
write_code = WriteCodeWithTools()
write_code = WriteAnalysisCode()
requirement = "构造数据集并进行数据清洗"
task_map = {
"1": Task(
task_id="1",
instruction="随机生成一个pandas DataFrame数据集",
task_type="other",
dependent_task_ids=[],
code="""
import pandas as pd
df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [1.1, 2.2, 3.3, 4.4, np.nan],
'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
'd': [1, 2, 3, 4, 5]
})
""",
is_finished=True,
),
"2": Task(
task_id="2",
instruction="对数据集进行数据清洗",
task_type="data_preprocess",
dependent_task_ids=["1"],
),
}
plan = Plan(
goal="构造数据集并进行数据清洗",
tasks=list(task_map.values()),
task_map=task_map,
current_task_id="2",
)
user_requirement = "Preprocess sklearn Wine recognition dataset and train a model to predict wine class (20% as validation), and show validation accuracy."
tool_info = """
## Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python class or function.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
context = STRUCTURAL_CONTEXT.format(
user_requirement=requirement,
context=plan.context,
tasks=list(task_map.values()),
current_task=plan.current_task.model_dump_json(),
)
context_msg = [Message(content=context, role="user")]
## Available Tools:
Each tool is described in JSON format. When you call a tool, import the tool from its path first.
{'FillMissingValue': {'type': 'class', 'description': 'Completing missing values with simple strategies.', 'methods': {'__init__': {'type': 'function', 'description': 'Initialize self. ', 'signature': '(self, features: \'list\', strategy: "Literal[\'mean\', \'median\', \'most_frequent\', \'constant\']" = \'mean\', fill_value=None)', 'parameters': 'Args: features (list): Columns to be processed. strategy (Literal["mean", "median", "most_frequent", "constant"], optional): The imputation strategy, notice \'mean\' and \'median\' can only be used for numeric features. Defaults to \'mean\'. fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values. Defaults to None.'}, 'fit': {'type': 'function', 'description': 'Fit a model to be used in subsequent transform. ', 'signature': "(self, df: 'pd.DataFrame')", 'parameters': 'Args: df (pd.DataFrame): The input DataFrame.'}, 'fit_transform': {'type': 'function', 'description': 'Fit and transform the input DataFrame. ', 'signature': "(self, df: 'pd.DataFrame') -> 'pd.DataFrame'", 'parameters': 'Args: df (pd.DataFrame): The input DataFrame. Returns: pd.DataFrame: The transformed DataFrame.'}, 'transform': {'type': 'function', 'description': 'Transform the input DataFrame with the fitted model. ', 'signature': "(self, df: 'pd.DataFrame') -> 'pd.DataFrame'", 'parameters': 'Args: df (pd.DataFrame): The input DataFrame. Returns: pd.DataFrame: The transformed DataFrame.'}}, 'tool_path': 'metagpt/tools/libs/data_preprocess.py'}
"""
code = await write_code.run(context_msg, plan)
code = code["code"]
code = await write_code.run(user_requirement=user_requirement, tool_info=tool_info)
assert len(code) > 0
print(code)
assert "metagpt.tools.libs" in code
@pytest.mark.asyncio
async def test_write_code_to_correct_error():
structural_context = """
## User Requirement
read a dataset test.csv and print its head
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "import pandas and load the dataset from 'test.csv'.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Print the head of the dataset to display the first few rows.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
async def test_debug_with_reflection():
user_requirement = "read a dataset test.csv and print its head"
plan_status = """
## Finished Tasks
### code
```python
```
### execution result
## Current Task
{"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
import pandas and load the dataset from 'test.csv'.
## Task Guidance
Write complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.
Specifically,
"""
wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata""" # use read_excel to read a csv
error = """
Traceback (most recent call last):
@ -139,186 +66,14 @@ async def test_write_code_to_correct_error():
raise ValueError(
ValueError: Excel file format cannot be determined, you must specify an engine manually.
"""
context = [
Message(content=structural_context, role="user"),
working_memory = [
Message(content=wrong_code, role="assistant"),
Message(content=error, role="user"),
]
new_code = await WriteCodeWithoutTools().run(context=context)
new_code = new_code["code"]
print(new_code)
new_code = await WriteAnalysisCode().run(
user_requirement=user_requirement,
plan_status=plan_status,
working_memory=working_memory,
use_reflection=True,
)
assert "read_csv" in new_code # should correct read_excel to read_csv
@pytest.mark.asyncio
async def test_write_code_reuse_code_simple():
structural_context = """
## User Requirement
read a dataset test.csv and print its head
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "import pandas and load the dataset from 'test.csv'.",
"task_type": "",
"code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
"result": "",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Print the head of the dataset to display the first few rows.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
code = await WriteCodeWithoutTools().run(context=context)
code = code["code"]
print(code)
assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_reuse_code_long():
"""test code reuse for long context"""
structural_context = """
## User Requirement
Run data analysis on sklearn Iris dataset, include a plot
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Load the Iris dataset from sklearn.",
"task_type": "",
"code": "from sklearn.datasets import load_iris\niris_data = load_iris()\niris_data['data'][0:5], iris_data['target'][0:5]",
"result": "(array([[5.1, 3.5, 1.4, 0.2],\n [4.9, 3. , 1.4, 0.2],\n [4.7, 3.2, 1.3, 0.2],\n [4.6, 3.1, 1.5, 0.2],\n [5. , 3.6, 1.4, 0.2]]),\n array([0, 0, 0, 0, 0]))",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Perform exploratory data analysis on the Iris dataset.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "3",
"dependent_task_ids": [
"2"
],
"instruction": "Create a plot visualizing the Iris dataset features.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Perform exploratory data analysis on the Iris dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
trials_num = 5
trials = [WriteCodeWithoutTools().run(context=context, temperature=0.0) for _ in range(trials_num)]
trial_results = await asyncio.gather(*trials)
print(*trial_results, sep="\n\n***\n\n")
success = [
"load_iris" not in result["code"] and "iris_data" in result["code"] for result in trial_results
] # should reuse iris_data from previous tasks
success_rate = sum(success) / trials_num
logger.info(f"success rate: {success_rate :.2f}")
assert success_rate >= 0.8
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_reuse_code_long_for_wine():
"""test code reuse for long context"""
structural_context = """
## User Requirement
Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
"task_type": "",
"code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
"result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": ["1"],
"instruction": "Create a plot to visualize some aspect of the wine dataset."
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "3",
"dependent_task_ids": ["1"],
"instruction": "Split the dataset into training and validation sets with a 20% validation size.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "4",
"dependent_task_ids": ["3"],
"instruction": "Train a model on the training set to predict wine class.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "5",
"dependent_task_ids": ["4"],
"instruction": "Evaluate the model on the validation set and report the accuracy.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
trials_num = 5
trials = [WriteCodeWithoutTools().run(context=context, temperature=0.0) for _ in range(trials_num)]
trial_results = await asyncio.gather(*trials)
print(*trial_results, sep="\n\n***\n\n")
success = [
"load_wine" not in result["code"] and "wine_data" in result["code"] for result in trial_results
] # should reuse iris_data from previous tasks
success_rate = sum(success) / trials_num
logger.info(f"success rate: {success_rate :.2f}")
assert success_rate >= 0.8

View file

@ -23,12 +23,10 @@ def test_precheck_update_plan_from_rsp():
@pytest.mark.asyncio
@pytest.mark.parametrize("use_tools", [(False), (True)])
async def test_write_plan(use_tools):
async def test_write_plan():
rsp = await WritePlan().run(
context=[Message("run analysis on sklearn iris dataset", role="user")], use_tools=use_tools
context=[Message("Run data analysis on sklearn Iris dataset, include a plot", role="user")]
)
assert "task_id" in rsp
assert "instruction" in rsp
assert "json" not in rsp # the output should be the content inside ```json ```

View file

@ -10,10 +10,9 @@ async def test_interpreter(mocker, auto_run):
mocker.patch("metagpt.actions.di.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
mocker.patch("builtins.input", return_value="confirm")
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
tools = []
requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
di = DataInterpreter(auto_run=auto_run, use_tools=True, tools=tools)
di = DataInterpreter(auto_run=auto_run)
rsp = await di.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0
@ -21,3 +20,16 @@ async def test_interpreter(mocker, auto_run):
finished_tasks = di.planner.plan.get_finished_tasks()
assert len(finished_tasks) > 0
assert len(finished_tasks[0].code) > 0 # check one task to see if code is recorded
@pytest.mark.asyncio
async def test_interpreter_react_mode(mocker):
mocker.patch("metagpt.actions.di.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
mocker.patch("builtins.input", return_value="confirm")
requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy."
di = DataInterpreter(react_mode="react")
rsp = await di.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -1,90 +0,0 @@
import pytest
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.logs import logger
from metagpt.roles.di.ml_engineer import MLEngineer
from metagpt.schema import Message, Plan, Task
from metagpt.tools.tool_type import ToolType
from tests.metagpt.actions.di.test_debug_code import CODE, DebugContext, ErrorStr
def test_mle_init():
mle = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
assert mle.tools == []
MockPlan = Plan(
goal="This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.",
context="",
tasks=[
Task(
task_id="1",
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code="",
result="",
is_success=False,
is_finished=False,
)
],
task_map={
"1": Task(
task_id="1",
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code="",
result="",
is_success=False,
is_finished=False,
)
},
current_task_id="1",
)
@pytest.mark.asyncio
async def test_mle_write_code(mocker):
data_path = "tests/data/ml_datasets/titanic"
mle = MLEngineer(auto_run=True, use_tools=True)
mle.planner.plan = MockPlan
code, _ = await mle._write_code()
assert data_path in code["code"]
@pytest.mark.asyncio
async def test_mle_update_data_columns(mocker):
mle = MLEngineer(auto_run=True, use_tools=True)
mle.planner.plan = MockPlan
# manually update task type to test update
mle.planner.plan.current_task.task_type = ToolType.DATA_PREPROCESS.value
result = await mle._update_data_columns()
assert result is not None
@pytest.mark.asyncio
async def test_mle_debug_code(mocker):
mle = MLEngineer(auto_run=True, use_tools=True)
mle.working_memory.add(Message(content=ErrorStr, cause_by=ExecuteNbCode))
mle.latest_code = CODE
mle.debug_context = DebugContext
code, _ = await mle._write_code()
assert len(code) > 0
@pytest.mark.skip
@pytest.mark.asyncio
async def test_ml_engineer():
data_path = "tests/data/ml_datasets/titanic"
requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
tools = ["FillMissingValue", "CatCross", "dummy_tool"]
mle = MLEngineer(auto_run=True, use_tools=True, tools=tools)
rsp = await mle.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -60,18 +60,24 @@ async def test_generate_webpages(mock_webpage_filename_with_styles_and_scripts,
async def test_save_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path):
generator = GPTvGenerator()
webpages = await generator.generate_webpages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
webpages_dir = generator.save_webpages(webpages=webpages, save_folder_name="test_1")
logs.logger.info(webpages_dir)
assert webpages_dir.exists()
assert (webpages_dir / "index.html").exists()
assert (webpages_dir / "styles.css").exists()
assert (webpages_dir / "scripts.js").exists()
@pytest.mark.asyncio
async def test_save_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path):
generator = GPTvGenerator()
webpages = await generator.generate_webpages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
webpages_dir = generator.save_webpages(webpages=webpages, save_folder_name="test_2")
logs.logger.info(webpages_dir)
assert webpages_dir.exists()
assert (webpages_dir / "index.html").exists()
assert (webpages_dir / "style.css").exists()
assert (webpages_dir / "script.js").exists()
@pytest.mark.asyncio

View file

@ -1,44 +1,8 @@
from typing import Literal, Union
import pandas as pd
from metagpt.tools.tool_convert import convert_code_to_tool_schema, docstring_to_schema
def test_docstring_to_schema():
docstring = """
Some test desc.
Args:
features (list): Columns to be processed.
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only be
used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
Defaults to None.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
expected = {
"description": "Some test desc.",
"parameters": {
"properties": {
"features": {"type": "list", "description": "Columns to be processed."},
"strategy": {
"type": "str",
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
"default": "'mean'",
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
},
"fill_value": {
"type": "int",
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
"default": "None",
},
},
"required": ["features"],
},
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
}
schema = docstring_to_schema(docstring)
assert schema == expected
from metagpt.tools.tool_convert import convert_code_to_tool_schema
class DummyClass:
@ -81,12 +45,25 @@ class DummyClass:
pass
def dummy_fn(df: pd.DataFrame) -> dict:
def dummy_fn(
df: pd.DataFrame,
s: str,
k: int = 5,
type: Literal["a", "b", "c"] = "a",
test_dict: dict[str, int] = None,
test_union: Union[str, list[str]] = "",
) -> dict:
"""
Analyzes a DataFrame and categorizes its columns based on data types.
Args:
df (pd.DataFrame): The DataFrame to be analyzed.
df: The DataFrame to be analyzed.
Another line for df.
s (str): Some test string param.
Another line for s.
k (int, optional): Some test integer param. Defaults to 5.
type (Literal["a", "b", "c"], optional): Some test type. Defaults to 'a'.
more_args: will be omitted here for testing
Returns:
dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
@ -115,41 +92,21 @@ def test_convert_code_to_tool_schema_class():
"methods": {
"__init__": {
"type": "function",
"description": "Initialize self.",
"parameters": {
"properties": {
"features": {"type": "list", "description": "Columns to be processed."},
"strategy": {
"type": "str",
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
"default": "'mean'",
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
},
"fill_value": {
"type": "int",
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
"default": "None",
},
},
"required": ["features"],
},
"description": "Initialize self. ",
"signature": "(self, features: list, strategy: str = 'mean', fill_value=None)",
"parameters": "Args: features (list): Columns to be processed. strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'. fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
},
"fit": {
"type": "function",
"description": "Fit the FillMissingValue model.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
"required": ["df"],
},
"description": "Fit the FillMissingValue model. ",
"signature": "(self, df: pandas.core.frame.DataFrame)",
"parameters": "Args: df (pd.DataFrame): The input DataFrame.",
},
"transform": {
"type": "function",
"description": "Transform the input DataFrame with the fitted model.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
"required": ["df"],
},
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
"description": "Transform the input DataFrame with the fitted model. ",
"signature": "(self, df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame",
"parameters": "Args: df (pd.DataFrame): The input DataFrame. Returns: pd.DataFrame: The transformed DataFrame.",
},
},
}
@ -160,11 +117,9 @@ def test_convert_code_to_tool_schema_class():
def test_convert_code_to_tool_schema_function():
expected = {
"type": "function",
"description": "Analyzes a DataFrame and categorizes its columns based on data types.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The DataFrame to be analyzed."}},
"required": ["df"],
},
"description": "Analyzes a DataFrame and categorizes its columns based on data types. ",
"signature": "(df: pandas.core.frame.DataFrame, s: str, k: int = 5, type: Literal['a', 'b', 'c'] = 'a', test_dict: dict[str, int] = None, test_union: Union[str, list[str]] = '') -> dict",
"parameters": "Args: df: The DataFrame to be analyzed. Another line for df. s (str): Some test string param. Another line for s. k (int, optional): Some test integer param. Defaults to 5. type (Literal[\"a\", \"b\", \"c\"], optional): Some test type. Defaults to 'a'. more_args: will be omitted here for testing Returns: dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others'). Each key corresponds to a list of column names belonging to that category.",
}
schema = convert_code_to_tool_schema(dummy_fn)
assert schema == expected

View file

@ -0,0 +1,90 @@
import pytest
from metagpt.schema import Plan, Task
from metagpt.tools import TOOL_REGISTRY
from metagpt.tools.tool_recommend import (
BM25ToolRecommender,
ToolRecommender,
TypeMatchToolRecommender,
)
@pytest.fixture
def mock_plan(mocker):
task_map = {
"1": Task(
task_id="1",
instruction="conduct feature engineering, add new features on the dataset",
task_type="feature engineering",
)
}
plan = Plan(
goal="test requirement",
tasks=list(task_map.values()),
task_map=task_map,
current_task_id="1",
)
return plan
@pytest.fixture
def mock_bm25_tr(mocker):
tr = BM25ToolRecommender(tools=["FillMissingValue", "PolynomialExpansion", "web scraping"])
return tr
def test_tr_init():
tr = ToolRecommender(tools=["FillMissingValue", "PolynomialExpansion", "web scraping", "non-existing tool"])
# web_scraping is a tool tag, it has one tool scrape_web_playwright
assert list(tr.tools.keys()) == [
"FillMissingValue",
"PolynomialExpansion",
"scrape_web_playwright",
]
def test_tr_init_default_tools_value():
tr = ToolRecommender()
assert tr.tools == {}
def test_tr_init_tools_all():
tr = ToolRecommender(tools=["<all>"])
assert list(tr.tools.keys()) == list(TOOL_REGISTRY.get_all_tools().keys())
@pytest.mark.asyncio
async def test_bm25_tr_recall_with_plan(mock_plan, mock_bm25_tr):
result = await mock_bm25_tr.recall_tools(plan=mock_plan)
assert len(result) == 3
assert result[0].name == "PolynomialExpansion"
@pytest.mark.asyncio
async def test_bm25_tr_recall_no_plan(mock_plan, mock_bm25_tr):
result = await mock_bm25_tr.recall_tools(
context="conduct feature engineering, add new features on the dataset", plan=None
)
assert len(result) == 3
assert result[0].name == "PolynomialExpansion"
@pytest.mark.asyncio
async def test_bm25_recommend_tools(mock_bm25_tr):
result = await mock_bm25_tr.recommend_tools(context="conduct feature engineering, add new features on the dataset")
assert len(result) == 2 # web scraping tool should be filtered out at rank stage
assert result[0].name == "PolynomialExpansion"
@pytest.mark.asyncio
async def test_get_recommended_tool_info(mock_plan, mock_bm25_tr):
result = await mock_bm25_tr.get_recommended_tool_info(plan=mock_plan)
assert isinstance(result, str)
@pytest.mark.asyncio
async def test_tm_tr_recall_with_plan(mock_plan, mock_bm25_tr):
tr = TypeMatchToolRecommender(tools=["FillMissingValue", "PolynomialExpansion", "web scraping"])
result = await tr.recall_tools(plan=mock_plan)
assert len(result) == 1
assert result[0].name == "PolynomialExpansion"

View file

@ -1,7 +1,6 @@
import pytest
from metagpt.tools.tool_registry import ToolRegistry
from metagpt.tools.tool_type import ToolType
@pytest.fixture
@ -9,25 +8,11 @@ def tool_registry():
return ToolRegistry()
@pytest.fixture
def tool_registry_full():
return ToolRegistry(tool_types=ToolType)
# Test Initialization
def test_initialization(tool_registry):
assert isinstance(tool_registry, ToolRegistry)
assert tool_registry.tools == {}
assert tool_registry.tool_types == {}
assert tool_registry.tools_by_types == {}
# Test Initialization with tool types
def test_initialize_with_tool_types(tool_registry_full):
assert isinstance(tool_registry_full, ToolRegistry)
assert tool_registry_full.tools == {}
assert tool_registry_full.tools_by_types == {}
assert "data_preprocess" in tool_registry_full.tool_types
assert tool_registry.tools_by_tags == {}
class TestClassTool:
@ -72,31 +57,24 @@ def test_get_tool(tool_registry):
assert "description" in tool.schemas
# Similar tests for has_tool_type, get_tool_type, get_tools_by_type
def test_has_tool_type(tool_registry_full):
assert tool_registry_full.has_tool_type("data_preprocess")
assert not tool_registry_full.has_tool_type("NonexistentType")
def test_has_tool_tag(tool_registry):
tool_registry.register_tool(
"TestClassTool", "/path/to/tool", tool_source_object=TestClassTool, tags=["machine learning", "test"]
)
assert tool_registry.has_tool_tag("test")
assert not tool_registry.has_tool_tag("Non-existent tag")
def test_get_tool_type(tool_registry_full):
retrieved_type = tool_registry_full.get_tool_type("data_preprocess")
assert retrieved_type is not None
assert retrieved_type.name == "data_preprocess"
def test_get_tools_by_type(tool_registry):
tool_type_name = "TestType"
def test_get_tools_by_tag(tool_registry):
tool_tag_name = "Test Tag"
tool_name = "TestTool"
tool_path = "/path/to/tool"
tool_registry.register_tool(tool_name, tool_path, tool_type=tool_type_name, tool_source_object=TestClassTool)
tool_registry.register_tool(tool_name, tool_path, tags=[tool_tag_name], tool_source_object=TestClassTool)
tools_by_type = tool_registry.get_tools_by_type(tool_type_name)
assert tools_by_type is not None
assert tool_name in tools_by_type
tools_by_tag = tool_registry.get_tools_by_tag(tool_tag_name)
assert tools_by_tag is not None
assert tool_name in tools_by_tag
# Test case for when the tool type does not exist
def test_get_tools_by_nonexistent_type(tool_registry):
tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
assert not tools_by_type
tools_by_tag_non_existent = tool_registry.get_tools_by_tag("Non-existent Tag")
assert not tools_by_tag_non_existent

View file

@ -5,8 +5,10 @@ from metagpt.config2 import config
from metagpt.configs.llm_config import LLMType
from metagpt.logs import logger
from metagpt.provider.azure_openai_api import AzureOpenAILLM
from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA
from metagpt.provider.openai_api import OpenAILLM
from metagpt.schema import Message
from metagpt.utils.common import process_message
OriginalLLM = OpenAILLM if config.llm.api_type == LLMType.OPENAI else AzureOpenAILLM
@ -32,14 +34,13 @@ class MockLLM(OriginalLLM):
async def original_aask(
self,
msg: str,
msg: Union[str, list[dict[str, str]]],
system_msgs: Optional[list[str]] = None,
format_msgs: Optional[list[dict[str, str]]] = None,
images: Optional[Union[str, list[str]]] = None,
timeout=3,
stream=True,
):
"""A copy of metagpt.provider.base_llm.BaseLLM.aask, we can't use super().aask because it will be mocked"""
) -> str:
if system_msgs:
message = self._system_msgs(system_msgs)
else:
@ -48,7 +49,11 @@ class MockLLM(OriginalLLM):
message = []
if format_msgs:
message.extend(format_msgs)
message.append(self._user_msg(msg, images=images))
if isinstance(msg, str):
message.append(self._user_msg(msg, images=images))
else:
message.extend(msg)
logger.debug(message)
rsp = await self.acompletion_text(message, stream=stream, timeout=timeout)
return rsp
@ -67,19 +72,27 @@ class MockLLM(OriginalLLM):
A copy of metagpt.provider.openai_api.OpenAILLM.aask_code, we can't use super().aask because it will be mocked.
Since openai_api.OpenAILLM.aask_code is different from base_llm.BaseLLM.aask_code, we use the former.
"""
if "tools" not in kwargs:
configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
kwargs.update(configs)
rsp = await self._achat_completion_function(messages, **kwargs)
return self.get_choice_function_arguments(rsp)
async def aask(
self,
msg: str,
msg: Union[str, list[dict[str, str]]],
system_msgs: Optional[list[str]] = None,
format_msgs: Optional[list[dict[str, str]]] = None,
images: Optional[Union[str, list[str]]] = None,
timeout=3,
stream=True,
) -> str:
msg_key = msg # used to identify it a message has been called before
# used to identify it a message has been called before
if isinstance(msg, list):
msg_key = "#MSG_SEP#".join([m["content"] for m in msg])
else:
msg_key = msg
if system_msgs:
joined_system_msg = "#MSG_SEP#".join(system_msgs) + "#SYSTEM_MSG_END#"
msg_key = joined_system_msg + msg_key
@ -92,8 +105,7 @@ class MockLLM(OriginalLLM):
return rsp
async def aask_code(self, messages: Union[str, Message, list[dict]], **kwargs) -> dict:
messages = self._process_message(messages)
msg_key = json.dumps(messages, ensure_ascii=False)
msg_key = json.dumps(process_message(messages), ensure_ascii=False)
rsp = await self._mock_rsp(msg_key, self.original_aask_code, messages, **kwargs)
return rsp