Merge remote-tracking branch 'origin/dev' into dev_tool_selection

# Conflicts:
#	metagpt/roles/ml_engineer.py
This commit is contained in:
lidanyang 2023-12-13 19:52:27 +08:00
commit 3c8ef3e848
15 changed files with 806 additions and 160 deletions

1
.gitignore vendored
View file

@ -167,3 +167,4 @@ tmp
output.wav
metagpt/roles/idea_agent.py
.aider*
/tests/metagpt/actions/check_data.py

40
kaggle_team.py Normal file
View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import asyncio
import fire
from metagpt.roles.kaggle_manager import KaggleManager
from metagpt.roles.ml_engineer import MLEngineer
from metagpt.team import Team
async def main(
# competition: str,
# data_desc: str,
# requirement: str,
investment: float = 5.0,
n_round: int = 10,
auto_run: bool = False,
):
competition, data_desc, requirement = (
"titanic",
"Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
# "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
# "generate a random prediction, replace the Survived column of gender_submission.csv, and save the prediction to a new submission file",
"Score as high as possible for the provided dataset, save the test prediction to a csv with two columns PassengerId and Survived"
)
team = Team()
team.hire(
[
KaggleManager(competition=competition, data_desc=data_desc),
MLEngineer(goal=requirement, auto_run=auto_run),
]
)
team.invest(investment)
team.start_project(requirement)
await team.run(n_round=n_round)
if __name__ == '__main__':
fire.Fire(main)

View file

@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Tuple, Union
import traceback
import re
import nbformat
from nbclient import NotebookClient
@ -176,11 +177,31 @@ class ExecutePyCode(ExecuteCode, Action):
# TODO: add max_tries for run code.
cell_index = len(self.nb.cells) - 1
await self.nb_client.async_execute_cell(self.nb.cells[-1], cell_index)
return self.parse_outputs(self.nb.cells[-1].outputs), True
outputs = self.parse_outputs(self.nb.cells[-1].outputs)
success = True
except Exception as e:
# FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
# CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
return traceback.format_exc(), False
outputs = traceback.format_exc()
success = False
return truncate(remove_escape_and_color_codes(outputs)), success
else:
# TODO: markdown
raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
def truncate(result: str, keep_len: int = 2000) -> str:
desc = f"Truncated to show only the last {keep_len} characters\n"
if result.startswith(desc):
result = result[len(desc) :]
if len(result) > keep_len:
result = result[-keep_len:]
return desc + result
return result
def remove_escape_and_color_codes(input_str):
# 使用正则表达式去除转义字符和颜色代码
pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
result = pattern.sub('', input_str)
return result

View file

@ -0,0 +1,116 @@
import json
from typing import Dict, List, Union
from metagpt.actions import Action
from metagpt.schema import Message, Plan
from metagpt.utils.common import CodeParser
from metagpt.logs import logger
class ReviewConst:
TASK_REVIEW_TRIGGER = "task"
CODE_REVIEW_TRIGGER = "code"
CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
CHANGE_WORD = ["change"]
EXIT_WORD = ["exit"]
TASK_REVIEW_INSTRUCTION = (
f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
)
CODE_REVIEW_INSTRUCTION = (
f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
)
EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
class AskReview(Action):
async def run(
self, context: List[Message], plan: Plan = None, trigger: str = "task"
):
logger.info("Current overall plan:")
logger.info(
"\n".join(
[
f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}"
for task in plan.tasks
]
)
)
logger.info("most recent context:")
latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
review_instruction = (
ReviewConst.TASK_REVIEW_INSTRUCTION
if trigger == ReviewConst.TASK_REVIEW_TRIGGER
else ReviewConst.CODE_REVIEW_INSTRUCTION
)
prompt = (
f"This is a <{trigger}> review. Please review output from {latest_action}\n"
f"{review_instruction}\n"
f"{ReviewConst.EXIT_INSTRUCTION}\n"
"Please type your review below:\n"
)
rsp = input(prompt)
if rsp.lower() in ReviewConst.EXIT_WORD:
exit()
# Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
# One could say "confirm this task, but change the next task to ..."
confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
return rsp, confirmed
class SummarizeAnalysis(Action):
PROMPT_TEMPLATE = """
# Context
{context}
# Summary
Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
"""
def __init__(self, name: str = "", context=None, llm=None) -> str:
super().__init__(name, context, llm)
async def run(self, conmpleted_plan: Plan) -> str:
tasks = json.dumps(
[task.dict() for task in conmpleted_plan.tasks],
indent=4,
ensure_ascii=False,
) # all tasks finished, return all task outputs
prompt = self.PROMPT_TEMPLATE.format(context=tasks)
summary = await self._aask(prompt)
return summary
class Reflect(Action):
PROMPT_TEMPLATE = """
# Context
__context__
# Latest User Requirement
__user_requirement__
# Summary
Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
Output a json following the format:
```json
{
"summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
"takeaways": str = "carefully find key takeaways from your summarization",
"reflection": str = "give specific instruction to improve your next trial in a step-by-step thinking process",
}
```
"""
REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
async def run(self, context: str, user_requirement: str = "") -> str:
user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
# prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
prompt = self.PROMPT_TEMPLATE.replace("__context__", context).replace("__user_requirement__", user_requirement)
rsp_json = await self._aask(prompt)
rsp = CodeParser.parse_code(block=None, text=rsp_json)
reflection = json.loads(rsp)["reflection"]
return reflection

View file

@ -4,13 +4,16 @@
@Author : orange-crow
@File : plan.py
"""
from typing import List, Dict
from typing import List, Dict, Tuple
import json
from copy import deepcopy
import traceback
from metagpt.actions import Action
from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
from metagpt.schema import Message, Task
from metagpt.schema import Message, Task, Plan
from metagpt.utils.common import CodeParser, create_func_config
from metagpt.logs import logger
class WritePlan(Action):
@ -19,7 +22,8 @@ class WritePlan(Action):
__context__
# Task:
Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes.
If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
If you encounter errors on the current task, revise and output the current single task only.
Output a list of jsons following the format:
```json
[
@ -67,8 +71,36 @@ class WritePlan(Action):
rsp = await self.assign_task_type(json.loads(rsp))
return rsp
@staticmethod
def rsp_to_tasks(rsp: str) -> List[Task]:
rsp = json.loads(rsp)
tasks = [Task(**task_config) for task_config in rsp]
return tasks
def rsp_to_tasks(rsp: str) -> List[Task]:
rsp = json.loads(rsp)
tasks = [Task(**task_config) for task_config in rsp]
return tasks
def update_plan_from_rsp(rsp: str, current_plan: Plan):
tasks = rsp_to_tasks(rsp)
if len(tasks) == 1 or tasks[0].dependent_task_ids:
if tasks[0].dependent_task_ids and len(tasks) > 1:
# tasks[0].dependent_task_ids means the generated tasks are not a complete plan
# for they depend on tasks in the current plan, in this case, we only support updating one task each time
logger.warning(
"Current plan will take only the first generated task if the generated tasks are not a complete plan"
)
# handle a single task
if current_plan.has_task_id(tasks[0].task_id):
# replace an existing task
current_plan.replace_task(tasks[0])
else:
# append one task
current_plan.append_task(tasks[0])
else:
# add tasks in general
current_plan.add_tasks(tasks)
def precheck_update_plan_from_rsp(rsp: str, current_plan: Plan) -> Tuple[bool, str]:
temp_plan = deepcopy(current_plan)
try:
update_plan_from_rsp(rsp, temp_plan)
return True, ""
except Exception as e:
return False, e

View file

@ -95,6 +95,9 @@ class Config(metaclass=Singleton):
self.prompt_format = self._get("PROMPT_FORMAT", "markdown")
self.kaggle_username = self._get("KAGGLE_USERNAME", "")
self.kaggle_key = self._get("KAGGLE_KEY", "")
def _init_with_config_files_and_env(self, configs: dict, yaml_file):
"""Load from config/key.yaml, config/config.yaml, and env in decreasing order of priority"""
configs.update(os.environ)

View file

@ -302,3 +302,14 @@ ML_MODULE_MAP = {
"data_preprocess": "metagpt.tools.functions.libs.data_preprocess",
"feature_engineering": "metagpt.tools.functions.libs.feature_engineering",
}
STRUCTURAL_CONTEXT = """
## User Requirement
{user_requirement}
## Data Description
{data_desc}
## Current Plan
{tasks}
## Current Task
{current_task}
"""

View file

@ -0,0 +1,153 @@
from typing import Dict, List, Union, Tuple
import json
import subprocess
import os
import fire
import pandas as pd
from metagpt.config import CONFIG
from metagpt.const import WORKSPACE_ROOT
from metagpt.roles import Role
from metagpt.actions import Action, BossRequirement
from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis
from metagpt.schema import Message, Task, Plan
from metagpt.logs import logger
from metagpt.utils.common import CodeParser
os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
def run_command(cmd):
print(cmd)
output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if output.returncode != 0:
print("Error output:", output.stderr)
exit()
else:
print(output.stdout)
return output.stdout
class DownloadData(Action):
async def run(self, competition, data_desc="") -> str:
data_path = WORKSPACE_ROOT / competition
output = run_command(f"kaggle competitions list --search {competition}")
assert output != "No competitions found", "You must provide the correct competition name"
run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
if not os.path.exists(data_path):
# if True:
# run_command(f"rm -r {data_path / '*'}")
run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}") # FIXME: not safe
file_list = run_command(f"ls {data_path}")
rsp = f"""
Location:
Data downloaded at {data_path} folder, including {file_list}
Data Description:
{data_desc}
"""
return rsp
class SubmitResult(Action):
PROMPT_TEMPLATE = """
# Summary
__summary__
# Your task
Extract the file path for test set prediction from the summary above, output a json following the format:
```json
{"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
```
"""
def __init__(self, name: str = "", context=None, llm=None) -> str:
super().__init__(name, context, llm)
async def _parse_submit_file_path(self, context) -> str:
prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
rsp = await self._aask(prompt)
rsp = CodeParser.parse_code(block=None, text=rsp)
file_path = json.loads(rsp)["file_path"]
return file_path
async def run(self, competition, submit_message="") -> str:
submit_file_path = await self._parse_submit_file_path(submit_message)
data_path = WORKSPACE_ROOT / competition
submit_message = submit_message.replace("'", "")
run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
run_command(f"kaggle competitions submissions --csv {competition} > {data_path / 'submission.csv'}")
leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
submission = pd.read_csv(data_path / 'submission.csv')
print(submission) # submission.to_json(orient="records")
submission_score = submission.loc[0, "publicScore"]
best_score = max(submission["publicScore"]) # might be min
rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
rank_pct = round(rank / len(leaderboard), 4) * 100
submission_summary = f"""
# All histories:
{submission.head(5).to_string()}
# Current
Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
"""
logger.info(submission_summary)
return submission_summary
class KaggleManager(Role):
def __init__(
self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""
):
super().__init__(name=name, profile=profile, goal=goal)
self._init_actions([DownloadData, SubmitResult])
self._watch([BossRequirement, SummarizeAnalysis])
self.competition = competition
self.data_desc = data_desc # currently passed in, later can be scrapped down from web by another Role
async def _think(self):
observed = self.get_memories()[-1].cause_by
if observed == BossRequirement:
self._set_state(0) # DownloadData, get competition of interest from human, download datasets
elif observed == SummarizeAnalysis:
self._set_state(1) # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
async def _act(self):
todo = self._rc.todo
logger.info(f"{self._setting}: ready to {self._rc.todo}")
if isinstance(todo, DownloadData):
rsp = await todo.run(self.competition, self.data_desc)
elif isinstance(todo, SubmitResult):
submit_message = self.get_memories()[-1].content # use analysis summary from MLEngineer as submission message
rsp = await todo.run(competition=self.competition, submit_message=submit_message)
msg = Message(content=rsp, role="user", cause_by=type(todo))
return msg
if __name__ == "__main__":
competition, data_desc, requirement = (
"titanic",
"Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
"Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
)
summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
async def main(requirement: str = requirement):
role = KaggleManager(competition=competition, data_desc=data_desc)
# await role.run(Message(content="", cause_by=BossRequirement))
await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
fire.Fire(main)

View file

@ -1,14 +1,20 @@
from typing import List
import json
import re
from datetime import datetime
from typing import List
import fire
import pandas as pd
from metagpt.roles import Role
from metagpt.schema import Message, Plan
from metagpt.memory import Memory
from metagpt.logs import logger
from metagpt.actions import Action
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp
from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
from metagpt.actions.write_code_steps import WriteCodeSteps
from metagpt.actions.write_plan import WritePlan
from metagpt.const import DATA_PATH, PROJECT_ROOT
@ -22,60 +28,7 @@ from metagpt.roles import Role
from metagpt.schema import Message, Plan
from metagpt.utils.common import CodeParser, remove_comments, create_func_config
from metagpt.actions.debug_code import DebugCode
STRUCTURAL_CONTEXT = """
## User Requirement
{user_requirement}
## Dataset Description
{data_desc}
## Current Plan
{tasks}
## Current Task
{current_task}
"""
def truncate(result: str, keep_len: int = 1000) -> str:
desc = "Truncated to show only the last 1000 characters\n"
if result.startswith(desc):
result = result[-len(desc):]
if len(result) > keep_len:
result = result[-keep_len:]
if not result.startswith(desc):
return desc + result
return desc
def remove_escape_and_color_codes(input_str):
# 使用正则表达式去除转义字符和颜色代码
pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
result = pattern.sub('', input_str)
return result
class AskReview(Action):
async def run(self, context: List[Message], plan: Plan = None):
logger.info("Current overall plan:")
logger.info(
"\n".join([f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}" for task in plan.tasks])
)
logger.info("most recent context:")
latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
prompt = f"\nPlease review output from {latest_action}:\n" \
"If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \
"If you confirm the output and wish to continue with the current process, type CONFIRM\n" \
"If you want to terminate the process, type exit:\n"
rsp = input(prompt)
if rsp.lower() in ("exit"):
exit()
confirmed = rsp.lower() in ("confirm", "yes", "y")
return rsp, confirmed
from metagpt.utils.save_code import save_code_file
class UpdateDataColumns(Action):
@ -91,32 +44,58 @@ class UpdateDataColumns(Action):
class MLEngineer(Role):
def __init__(
self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False,
self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False
):
super().__init__(name=name, profile=profile, goal=goal)
self._set_react_mode(react_mode="plan_and_act")
self._watch([DownloadData, SubmitResult])
self.plan = Plan(goal=goal)
self.use_tools = True
self.use_code_steps = True
self.use_tools = False
self.use_code_steps = False
self.execute_code = ExecutePyCode()
self.auto_run = auto_run
self.data_desc = {}
# memory for working on each task, discarded each time a task is done
self.working_memory = Memory()
async def _plan_and_act(self):
### Actions in a multi-agent multi-turn setting ###
memories = self.get_memories()
if memories:
latest_event = memories[-1].cause_by
if latest_event == DownloadData:
self.plan.context = memories[-1].content
elif latest_event == SubmitResult:
# self reflect on previous plan outcomes and think about how to improve the plan, add to working memory
await self._reflect()
# get feedback for improvement from human, add to working memory
await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
### Common Procedure in both single- and multi-agent setting ###
# create initial plan and update until confirmation
await self._update_plan()
while self.plan.current_task:
task = self.plan.current_task
logger.info(f"ready to take on task {task}")
# take on current task
code, result, success, code_steps = await self._write_and_exec_code()
# ask for acceptance, users can other refuse and change tasks in the plan
task_result_confirmed = await self._ask_review()
if success and task_result_confirmed:
review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
if self.auto_run:
# if human confirms the task result, then we deem the task completed, regardless of whether the code run succeeds;
# if auto mode, then the code run has to succeed for the task to be considered completed
task_result_confirmed = success
if task_result_confirmed:
# tick off this task and record progress
task.code = code
task.result = result
@ -127,9 +106,33 @@ class MLEngineer(Role):
success, new_code = await self._update_data_columns()
if success:
task.code = task.code + "\n\n" + new_code
confirmed_and_more = (ReviewConst.CONTINUE_WORD[0] in review.lower()
and review.lower() not in ReviewConst.CONTINUE_WORD[0]) # "confirm, ... (more content, such as changing downstream tasks)"
if confirmed_and_more:
self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
await self._update_plan(review)
elif "redo" in review:
# Ask the Role to redo this task with help of review feedback,
# useful when the code run is successful but the procedure or result is not what we want
continue
else:
# update plan according to user's feedback and to take on changed tasks
await self._update_plan()
await self._update_plan(review)
completed_plan_memory = self.get_useful_memories() # completed plan as a outcome
self._rc.memory.add(completed_plan_memory[0]) # add to persistent memory
summary = await SummarizeAnalysis().run(self.plan)
rsp = Message(content=summary, cause_by=SummarizeAnalysis)
self._rc.memory.add(rsp)
# save code using datetime.now or keywords related to the goal of your project (plan.goal).
project_record = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
save_code_file(name=project_record, code_context=self.execute_code.nb, file_format="ipynb")
return rsp
time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
self.execute_code.save_notebook(f"{DATA_PATH}/notebooks/ml_{time}.ipynb")
@ -150,21 +153,25 @@ class MLEngineer(Role):
if self.use_code_steps
else ""
)
counter = 0
improve_code = ""
success = False
debug_context = []
finished_tasks = self.plan.get_finished_tasks()
code_context = [task.code for task in finished_tasks]
code_result = [task.result for task in finished_tasks]
code_context = "\n\n".join(code_context)
code_result = "\n\n".join(code_result)
while not success and counter < max_retry:
context = self.get_useful_memories()
# print("*" * 10)
# print(context)
# print("*" * 10)
# breakpoint()
if counter > 0:
improve_code = await DebugCode().run(plan=self.plan.current_task.instruction,
# finished_code=code_context,
@ -195,42 +202,42 @@ class MLEngineer(Role):
)
debug_context = tool_context
cause_by = WriteCodeWithTools
self.working_memory.add(
Message(content=code, role="assistant", cause_by=cause_by)
)
# debug on code, run on runcode with finished code and new_df
# runcode = code_context + "\n\n" + code
result, success = await self.execute_code.run(code)
# truncated the result
print(truncate(result))
result, success = await self.execute_code.run(code)
print(result)
self.working_memory.add(
Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
Message(content=result, role="user", cause_by=ExecutePyCode)
)
if "!pip" in code:
success = False
# if not success:
# await self._ask_review()
success = False
counter += 1
if not success and counter >= max_retry:
logger.info("coding failed!")
review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
if ReviewConst.CHANGE_WORD[0] in review:
counter = 0 # redo the task again with help of human suggestions
return code, result, success, code_steps
async def _ask_review(self):
if not self.auto_run:
async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
auto_run = auto_run or self.auto_run
if not auto_run:
context = self.get_useful_memories()
review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan)
review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
if not confirmed:
self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
return confirmed
return True
async def _update_plan(self, max_tasks: int = 3):
return review, confirmed
return "", True
async def _update_plan(self, review: str = "", max_tasks: int = 3, max_retries: int = 3):
plan_confirmed = False
while not plan_confirmed:
context = self.get_useful_memories()
rsp = await WritePlan().run(
@ -239,58 +246,64 @@ class MLEngineer(Role):
self.working_memory.add(
Message(content=rsp, role="assistant", cause_by=WritePlan)
)
plan_confirmed = await self._ask_review()
new_tasks = WritePlan.rsp_to_tasks(rsp)
logger.debug(len(self.plan.tasks))
logger.debug(len(new_tasks))
## fixme: 能重复执行多轮重新plan但应该有更优处理逻辑
## fixme: do not overwrite original tasks
tasks = self.plan.tasks + new_tasks
self.plan.add_tasks(tasks)
# precheck plan before asking reviews
is_plan_valid, error = precheck_update_plan_from_rsp(rsp, self.plan)
if not is_plan_valid and max_retries > 0:
error_msg = f"The generated plan is not valid with error: {error}, try regenerating, remember to generate either the whole plan or the single changed task only"
logger.warning(error_msg)
self.working_memory.add(Message(content=error_msg, role="assistant", cause_by=WritePlan))
max_retries -= 1
continue
_, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
update_plan_from_rsp(rsp, self.plan)
self.working_memory.clear()
async def _reflect(self):
context = self.get_memories()
context = "\n".join([str(msg) for msg in context])
# print("*" * 10)
# print(context)
# print("*" * 10)
reflection = await Reflect().run(context=context)
self.working_memory.add(Message(content=reflection, role="assistant"))
self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
def get_useful_memories(self, task_exclude_field: set = None) -> List[Message]:
"""find useful memories only to reduce context length and improve performance"""
# TODO dataset description , code steps
user_requirement = self.plan.goal
tasks = json.dumps(
[task.dict(exclude=task_exclude_field) for task in self.plan.tasks], indent=4, ensure_ascii=False
)
data_desc = self.plan.context
tasks = [task.dict(exclude=task_exclude_field) for task in self.plan.tasks]
for task in tasks:
# Shorten the context as we don't need code steps after we get the codes.
# This doesn't affect current_task below, which should hold the code steps
task.pop("code_steps")
tasks = json.dumps(tasks, indent=4, ensure_ascii=False)
current_task = self.plan.current_task.json() if self.plan.current_task else {}
context = STRUCTURAL_CONTEXT.format(
user_requirement=user_requirement,
data_desc=self.data_desc,
tasks=tasks,
current_task=current_task
user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
)
context_msg = [Message(content=context, role="user")]
return context_msg + self.working_memory.get()
@property
def working_memory(self):
return self._rc.memory
return context_msg + self.get_working_memories()
def get_working_memories(self) -> List[Message]:
return self.working_memory.get()
if __name__ == "__main__":
# requirement = "Run data analysis on sklearn Iris dataset, include a plot"
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
# requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
# requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
# requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
# requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
# requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy."
data_path = f"{DATA_PATH}/titanic"
requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
# requirement = f"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
# data_path = f"{DATA_PATH}/icr-identify-age-related-conditions"
# requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv."
async def main(requirement: str = requirement, auto_run: bool = True):
role = MLEngineer(goal=requirement, auto_run=auto_run)
await role.run(requirement)
fire.Fire(main)

View file

@ -86,6 +86,7 @@ class Task(BaseModel):
class Plan(BaseModel):
goal: str
context: str = ""
tasks: list[Task] = []
task_map: dict[str, Task] = {}
current_task_id = ""
@ -149,14 +150,81 @@ class Plan(BaseModel):
self.tasks = final_tasks
# Update current_task_id to the first unfinished task in the merged list
for task in self.tasks:
if not task.is_finished:
self.current_task_id = task.task_id
break
self._update_current_task()
# Update the task map for quick access to tasks by ID
self.task_map = {task.task_id: task for task in self.tasks}
def reset_task(self, task_id: str):
"""
Clear code and result of the task based on task_id, and set the task as unfinished.
Args:
task_id (str): The ID of the task to be reset.
Returns:
None
"""
if task_id in self.task_map:
task = self.task_map[task_id]
task.code = ""
task.result = ""
task.is_finished = False
def replace_task(self, new_task: Task):
"""
Replace an existing task with the new input task based on task_id, and reset all tasks depending on it.
Args:
new_task (Task): The new task that will replace an existing one.
Returns:
None
"""
if new_task.task_id in self.task_map:
# Replace the task in the task map and the task list
self.task_map[new_task.task_id] = new_task
for i, task in enumerate(self.tasks):
if task.task_id == new_task.task_id:
self.tasks[i] = new_task
break
# Reset dependent tasks
for task in self.tasks:
if new_task.task_id in task.dependent_task_ids:
self.reset_task(task.task_id)
def append_task(self, new_task: Task):
"""
Append a new task to the end of existing task sequences
Args:
new_task (Task): The new task to be appended to the existing task sequence
Returns:
None
"""
assert not self.has_task_id(new_task.task_id), "Task already in current plan, use replace_task instead"
assert all([self.has_task_id(dep_id) for dep_id in new_task.dependent_task_ids]), \
"New task has unknown dependencies"
# Existing tasks do not depend on the new task, it's fine to put it to the end of the sorted task sequence
self.tasks.append(new_task)
self.task_map[new_task.task_id] = new_task
self._update_current_task()
def has_task_id(self, task_id: str) -> bool:
return task_id in self.task_map
def _update_current_task(self):
current_task_id = ""
for task in self.tasks:
if not task.is_finished:
current_task_id = task.task_id
break
self.current_task_id = current_task_id # all tasks finished
@property
def current_task(self) -> Task:
"""Find current task to execute
@ -170,10 +238,8 @@ class Plan(BaseModel):
"""Finish current task, set Task.is_finished=True, set current task to next task
"""
if self.current_task_id:
current_task = self.current_task
current_task.is_finished = True
next_task_index = self.tasks.index(current_task) + 1
self.current_task_id = self.tasks[next_task_index].task_id if next_task_index < len(self.tasks) else None
self.current_task.is_finished = True
self._update_current_task() # set to next task
def get_finished_tasks(self) -> list[Task]:
"""return all finished tasks in correct linearized order

View file

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# @Date : 12/12/2023 4:14 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import os
import json
import nbformat
from metagpt.const import DATA_PATH
def save_code_file(name: str, code_context: str, file_format: str = "py") -> None:
"""
Save code files to a specified path.
Args:
- name (str): The name of the folder to save the files.
- code_context (str): The code content.
- file_format (str, optional): The file format. Supports 'py' (Python file), 'json' (JSON file), and 'ipynb' (Jupyter Notebook file). Default is 'py'.
Returns:
- None
"""
# Create the folder path if it doesn't exist
os.makedirs(name=DATA_PATH / "output" / f"{name}", exist_ok=True)
# Choose to save as a Python file or a JSON file based on the file format
file_path = DATA_PATH / "output" / f"{name}/code.{file_format}"
if file_format == "py":
with open(file_path, "w", encoding="utf-8") as fp:
fp.write(code_context + "\n\n")
elif file_format == "json":
# Parse the code content as JSON and save
data = {"code": code_context}
with open(file_path, "w", encoding="utf-8") as fp:
json.dump(data, fp, indent=2)
elif file_format == "ipynb":
nbformat.write(code_context, file_path)
else:
raise ValueError("Unsupported file format. Please choose 'py', 'json', or 'ipynb'.")

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.execute_code import ExecutePyCode, truncate
from metagpt.schema import Message
@ -81,3 +81,10 @@ async def test_plotting_bug():
pi = ExecutePyCode()
output = await pi.run(code)
assert output[1] is True
def test_truncate():
output = "hello world"
assert truncate(output) == output
output = "hello world"
assert truncate(output, 5) == "Truncated to show only the last 5 characters\nworld"

View file

@ -1,13 +1,15 @@
import pytest
from metagpt.actions.write_plan import WritePlan
from metagpt.actions.write_plan import WritePlan, precheck_update_plan_from_rsp, Plan, Task
def test_precheck_update_plan_from_rsp():
plan = Plan(goal="")
plan.add_tasks([Task(task_id="1")])
rsp = '[{"task_id": "2"}]'
success, _ = precheck_update_plan_from_rsp(rsp, plan)
assert success
assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1" # precheck should not change the original one
@pytest.mark.asyncio
async def test_plan():
p = WritePlan()
task_desc = """Heres some background information on Cyclistic, a bike-sharing company designing a marketing strategy aimed at converting casual riders into annual members: So far, Cyclistics marketing strategy has relied on building general awareness and engaging a wide range of consumers. group. One way to help achieve these goals is the flexibility of its pricing plans: one-way passes, full-day passes, and annual memberships. Customers who purchase a one-way or full-day pass are known as recreational riders. Customers purchasing an annual membership are Cyclistic members. I will provide you with a data sheet that records user behavior: '/Users/vicis/Downloads/202103-divvy-tripdata.csv"""
rsp = await p.run(task_desc, role="data analyst")
assert len(rsp.content) > 0
assert rsp.sent_from == "WritePlan"
print(rsp)
invalid_rsp = 'wrong'
success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
assert not success

View file

@ -5,6 +5,7 @@
@Author : alexanderwu
@File : test_schema.py
"""
import pytest
from metagpt.schema import AIMessage, Message, SystemMessage, UserMessage
from metagpt.schema import Task, Plan
@ -104,3 +105,82 @@ class TestPlan:
finished_tasks = plan.get_finished_tasks()
assert len(finished_tasks) == 1
assert finished_tasks[0].task_id == "1"
def test_reset_task_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
plan.add_tasks([task])
plan.reset_task("1")
reset_task = plan.task_map["1"]
assert reset_task.code == ""
assert reset_task.result == ""
assert not reset_task.is_finished
def test_reset_task_non_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
plan.add_tasks([task])
plan.reset_task("2") # Task with ID 2 does not exist
assert "1" in plan.task_map
assert "2" not in plan.task_map
def test_replace_task_with_dependents(self):
plan = Plan(goal="")
tasks = [Task(task_id="1", instruction="First Task", finished=True),
Task(task_id="2", instruction="Second Task", dependent_task_ids=["1"], finished=True)]
plan.add_tasks(tasks)
new_task = Task(task_id="1", instruction="Updated First Task")
plan.replace_task(new_task)
assert plan.task_map["1"].instruction == "Updated First Task"
assert not plan.task_map["2"].is_finished # Dependent task should be reset
assert plan.task_map["2"].code == ""
assert plan.task_map["2"].result == ""
def test_replace_task_non_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="First Task")
plan.add_tasks([task])
new_task = Task(task_id="2", instruction="New Task")
plan.replace_task(new_task) # Task with ID 2 does not exist in plan
assert "1" in plan.task_map
assert "2" not in plan.task_map
def test_append_task_with_valid_dependencies(self):
plan = Plan(goal="Test")
existing_task = [Task(task_id="1")]
plan.add_tasks(existing_task)
new_task = Task(task_id="2", dependent_task_ids=["1"])
plan.append_task(new_task)
assert plan.tasks[-1].task_id == "2"
assert plan.task_map["2"] == new_task
def test_append_task_with_invalid_dependencies(self):
new_task = Task(task_id="2", dependent_task_ids=["3"])
plan = Plan(goal="Test")
with pytest.raises(AssertionError):
plan.append_task(new_task)
def test_append_task_without_dependencies(self):
plan = Plan(goal="Test")
existing_task = [Task(task_id="1")]
plan.add_tasks(existing_task)
new_task = Task(task_id="2")
plan.append_task(new_task)
assert len(plan.tasks) == 2
assert plan.current_task_id == "1"
def test_append_task_updates_current_task(self):
finished_task = Task(task_id="1", is_finished=True)
new_task = Task(task_id="2")
plan = Plan(goal="Test", tasks=[finished_task])
plan.append_task(new_task)
assert plan.current_task_id == "2"
def test_update_current_task(self):
task1 = Task(task_id="1", is_finished=True)
task2 = Task(task_id="2")
plan = Plan(goal="Test", tasks=[task1, task2])
plan._update_current_task()
assert plan.current_task_id == "2"

View file

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# @Date : 12/12/2023 4:17 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import pytest
import os
import json
import nbformat
from metagpt.actions.write_analysis_code import WriteCodeByGenerate
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.utils.save_code import save_code_file, DATA_PATH
def test_save_code_file_python():
save_code_file("example", "print('Hello, World!')")
file_path = DATA_PATH / "output" / "example" / "code.py"
assert os.path.exists(file_path), f"File does not exist: {file_path}"
def test_save_code_file_python():
save_code_file("example", "print('Hello, World!')")
file_path = DATA_PATH / "output" / "example" / "code.py"
with open(file_path, "r", encoding="utf-8") as fp:
content = fp.read()
assert "print('Hello, World!')" in content, "File content does not match"
def test_save_code_file_json():
save_code_file("example_json", "print('Hello, JSON!')", file_format="json")
file_path = DATA_PATH / "output" / "example_json" / "code.json"
with open(file_path, "r", encoding="utf-8") as fp:
data = json.load(fp)
assert "code" in data, "JSON key 'code' is missing"
assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"
@pytest.mark.asyncio
async def test_save_code_file_notebook():
code = await WriteCodeByGenerate().run(
context="basic python, hello world", plan="", code_steps="", temperature=0.0
)
executor = ExecutePyCode()
await executor.run(code)
# Save as a Notebook file
save_code_file("example_nb", executor.nb, file_format="ipynb")
file_path = DATA_PATH / "output" / "example_nb" / "code.ipynb"
assert os.path.exists(file_path), f"Notebook file does not exist: {file_path}"
# Additional checks specific to notebook format
notebook = nbformat.read(file_path, as_version=4)
assert len(notebook.cells) > 0, "Notebook should have at least one cell"
first_cell_source = notebook.cells[0].source
assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"