mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-27 14:25:20 +02:00
add experimenter
This commit is contained in:
parent
d14f07f9b1
commit
32759f031c
14 changed files with 196 additions and 51 deletions
24
expo/MCTS.py
24
expo/MCTS.py
|
|
@ -169,8 +169,8 @@ class Node():
|
|||
# return evaluate_score(predictions, gt, metric)
|
||||
|
||||
def evaluate_prediction(self, split):
|
||||
pred_path = os.path.join(self.state["work_dir"], self.state["task"], f"{split}-predictions.csv")
|
||||
pred_node_path = os.path.join(self.state["node_dir"], f"Node-{self.id}-{split}-predictions.csv")
|
||||
pred_path = os.path.join(self.state["work_dir"], self.state["task"], f"{split}_predictions.csv")
|
||||
pred_node_path = os.path.join(self.state["node_dir"], f"Node-{self.id}-{split}_predictions.csv")
|
||||
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])
|
||||
preds = pd.read_csv(pred_path)["target"]
|
||||
preds.to_csv(pred_node_path, index=False)
|
||||
|
|
@ -201,12 +201,12 @@ class Node():
|
|||
score_dict = await role.get_score()
|
||||
score_dict = self.evaluate_simulation(score_dict)
|
||||
self.raw_reward = score_dict
|
||||
|
||||
if self.state["low_is_better"]:
|
||||
# normalized the score to be between 0 and 1, and higher is better
|
||||
def normalize_score(score):
|
||||
return 1 / (1 + score)
|
||||
score_dict = {k: normalize_score(v) for k, v in score_dict.items()}
|
||||
self.normalized_reward = score_dict
|
||||
return score_dict
|
||||
|
||||
|
||||
|
|
@ -262,19 +262,23 @@ class MCTS():
|
|||
def best_path(self, root : Node):
|
||||
best_child = root
|
||||
best_score = 0
|
||||
def bfs(node : Node, best_score, best_child : Node):
|
||||
def bfs(node : Node, best_score, best_child : Node, split):
|
||||
assert split in ["test_score", "dev_score"]
|
||||
if node not in self.children:
|
||||
return best_score, best_child
|
||||
for child in self.children[node]:
|
||||
print(child.id, child.raw_value)
|
||||
if child.raw_value > best_score:
|
||||
best_score = child.raw_value
|
||||
score = child.normalized[split]
|
||||
print(child.id, score)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_child = child
|
||||
best_score, best_child = bfs(child, best_score, best_child)
|
||||
return best_score, best_child
|
||||
best_score, best_child = bfs(root, best_score, best_child)
|
||||
mcts_logger.log("MCTS", f"Best Score: {best_score}, Best Node ID: {best_child.id}")
|
||||
return best_child
|
||||
_, best_child = bfs(root, best_score, best_child, "test_score")
|
||||
_, dev_best_child = bfs(root, best_score, best_child, "dev_score")
|
||||
|
||||
return {"dev_best": dev_best_child,
|
||||
"global_best": best_child}
|
||||
|
||||
def get_num_simulations(self):
|
||||
return self.root_node.visited
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ TASK_PROMPT = """\
|
|||
**Attention** Please do not leak the target label in any form during training.
|
||||
|
||||
## Saving Dev and Test Predictions
|
||||
Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results.
|
||||
The file should contain a single `target` column with the predicted values.
|
||||
Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
|
||||
Both files should contain a single `target` column with the predicted values.
|
||||
Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
|
||||
|
||||
## Output Training Set Performance
|
||||
|
|
@ -129,6 +129,7 @@ def generate_task_requirement(task_name, data_config):
|
|||
user_requirement = TASK_PROMPT.format(user_requirement=user_requirement,
|
||||
train_path=train_path, dev_path=dev_path, test_path=test_path,
|
||||
output_dir=output_dir)
|
||||
print(user_requirement)
|
||||
return user_requirement
|
||||
|
||||
|
||||
|
|
|
|||
3
expo/experimenter/__init__.py
Normal file
3
expo/experimenter/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .experimenter import Experimenter
|
||||
from .mcts import MCTSExperimenter
|
||||
from .aug import AugExperimenter
|
||||
60
expo/experimenter/aug.py
Normal file
60
expo/experimenter/aug.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from experimenter import Experimenter
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
|
||||
from expo.insights.InsightGenerate import InsightGenerator
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
|
||||
EXPS_PROMPT = """
|
||||
When doing the tasks, you can refer to the insights below:
|
||||
{experience}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
class AugExperimenter(Experimenter):
|
||||
result_path : str = "results/aug"
|
||||
|
||||
async def run_experiment(self):
|
||||
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
user_requirement = state["requirement"]
|
||||
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
|
||||
exp_pool = InsightGenerator.load_analysis_pool(exp_pool_path)
|
||||
if self.args.aug_mode == "single":
|
||||
exps = InsightGenerator._random_sample(exp_pool, self.args.num_experiments)
|
||||
exps = [exp["Analysis"] for exp in exps]
|
||||
elif self.args.aug_mode == "set":
|
||||
exp_set = InsightGenerator.sample_instruction_set(exp_pool)
|
||||
exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set])
|
||||
exps = [exp_set_text] * self.args.num_experiments
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
|
||||
|
||||
results = []
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.use_reflection)
|
||||
di.role_dir = f"{di.role_dir}_{self.args.task}"
|
||||
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
|
||||
print(requirement)
|
||||
await di.run(requirement)
|
||||
score_dict = await di.get_score(low_is_better=False)
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results.append({
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"insights" : exps[i],
|
||||
"user_requirement": user_requirement,
|
||||
"args": self.args
|
||||
})
|
||||
scores = [score_dict["test_score"] for score_dict in scores]
|
||||
avg_score = sum(scores) / len(scores)
|
||||
best_score = max(scores) if not self.args.low_is_better else min(scores)
|
||||
best_score_idx = scores.index(best_score)
|
||||
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
|
||||
self.save_results(results)
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,18 +1,57 @@
|
|||
from expo.utils import DATA_CONFIG
|
||||
import os
|
||||
import pandas as pd
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
import datetime
|
||||
import json
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
|
||||
|
||||
class Experimenter:
|
||||
result_path : str = "results"
|
||||
data_config = DATA_CONFIG
|
||||
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
self.args = args
|
||||
self.start_time = datetime.datetime.now().strftime("%Y%m%d%H%M")
|
||||
|
||||
async def run_experiment(self):
|
||||
pass
|
||||
|
||||
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
user_requirement = state["requirement"]
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.use_reflection)
|
||||
await di.run(user_requirement)
|
||||
|
||||
def save_scores(self):
|
||||
pass
|
||||
|
||||
def save_result(self):
|
||||
score_dict = await di.get_score(low_is_better=False)
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results = {
|
||||
"test_score": self.test_score,
|
||||
"num_experiments": self.num_experiments,
|
||||
"insights": self.insights,
|
||||
"avg_score": self.avg_score,
|
||||
}
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"user_requirement": user_requirement,
|
||||
"args": self.args
|
||||
}
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_prediction(self, split, state):
|
||||
pred_path = os.path.join(state["work_dir"], state["task"], f"{split}_predictions.csv")
|
||||
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
|
||||
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
|
||||
preds = pd.read_csv(pred_path)["target"]
|
||||
preds.to_csv(pred_node_path, index=False)
|
||||
gt = pd.read_csv(gt_path)["target"]
|
||||
metric = state["dataset_config"]["metric"]
|
||||
return evaluate_score(preds, gt, metric)
|
||||
|
||||
def evaluate(self, score_dict, state):
|
||||
scores = {
|
||||
"dev_score": self.evaluate_prediction("dev", state),
|
||||
"test_score": self.evaluate_prediction("test", state),
|
||||
}
|
||||
score_dict.update(scores)
|
||||
return score_dict
|
||||
|
||||
|
||||
def save_result(self, result):
|
||||
with open(f"{self.result_path}/{self.args.task}_{self.start_time}.json", "w") as f:
|
||||
json.dump(result, f, indent=4)
|
||||
|
|
|
|||
44
expo/experimenter/mcts.py
Normal file
44
expo/experimenter/mcts.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
from expo.experimenter import Experimenter
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.MCTS import MCTS
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
|
||||
|
||||
class MCTSExperimenter(Experimenter):
|
||||
result_path : str = "results/mcts"
|
||||
async def run_experiment(self):
|
||||
mcts = MCTS(root_node=None, max_depth=5)
|
||||
best_nodes = await mcts.search(self.args.task, self.data_config,
|
||||
low_is_better=self.args.low_is_better,
|
||||
load_tree=self.args.load_tree,
|
||||
reflection=self.args.reflection,
|
||||
rollout=self.args.rollout,
|
||||
name=self.args.name)
|
||||
best_node = best_nodes["global_best"]
|
||||
dev_best_node = best_nodes["dev_best"]
|
||||
|
||||
text, num_generated_codes = get_tree_text(mcts.root_node)
|
||||
text += f"Generated {num_generated_codes} unique codes.\n"
|
||||
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
|
||||
text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n"
|
||||
print(text)
|
||||
self.save_tree(text)
|
||||
|
||||
results = {
|
||||
"best_node": best_node,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"args": self.args
|
||||
}
|
||||
self.save_result(results)
|
||||
|
||||
|
||||
|
||||
def save_tree(self, tree_text):
|
||||
fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt"
|
||||
with open(fpath, "w") as f:
|
||||
f.write(tree_text)
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ import random
|
|||
import json
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.schema import Message
|
||||
from expo.utils import load_data_config, mcts_logger
|
||||
from expo.utils import load_data_config, mcts_logger, clean_json_from_rsp
|
||||
DATA_CONFIG = load_data_config()
|
||||
|
||||
|
||||
|
|
@ -52,17 +52,6 @@ class InsightGenerator:
|
|||
for task_id in sorted(data_dict.keys()):
|
||||
instruction_set.append(random.choice(data_dict[task_id]))
|
||||
return instruction_set
|
||||
|
||||
|
||||
@staticmethod
|
||||
def clean_json_from_rsp(text):
|
||||
pattern = r"```json(.*?)```"
|
||||
matches = re.findall(pattern, text, re.DOTALL)
|
||||
if matches:
|
||||
json_str = "\n".join(matches)
|
||||
return json_str
|
||||
else:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def format_output(rsp):
|
||||
|
|
@ -109,6 +98,6 @@ class InsightGenerator:
|
|||
llm_response = await llm.aask(
|
||||
context, system_msgs=[REFLECTION_SYSTEM_MSG]
|
||||
)
|
||||
rsp = InsightGenerator.clean_json_from_rsp(llm_response)
|
||||
rsp = clean_json_from_rsp(llm_response)
|
||||
new_instruction = json.loads(rsp)["New Instruction"]
|
||||
return new_instruction
|
||||
|
|
@ -8,7 +8,6 @@ from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
|
|||
from metagpt.utils.common import CodeParser
|
||||
from metagpt.utils.common import write_json_file, read_json_file, format_trackback_info
|
||||
from metagpt.const import MESSAGE_ROUTE_TO_ALL, SERDESER_PATH
|
||||
from metagpt.utils.recovery_util import save_history
|
||||
from expo.utils import mcts_logger, save_notebook
|
||||
from pydantic import Field, model_validator
|
||||
from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
|
||||
|
|
@ -126,7 +125,6 @@ class ResearchAssistant(DataInterpreter):
|
|||
role_path = os.path.join(stg_path, f"{name}.json")
|
||||
# 将状态保存为 JSON 文件
|
||||
write_json_file(role_path, self.model_dump())
|
||||
save_history(role=self, save_dir=stg_path, name=name)
|
||||
|
||||
|
||||
def remap_tasks(self):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
from expo.MCTS import MCTS, Node, initialize_di_root_node
|
||||
from expo.utils import load_data_config
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
from expo.experimenter import MCTSExperimenter, Experimenter, AugExperimenter
|
||||
import asyncio
|
||||
import argparse
|
||||
|
||||
|
|
@ -9,11 +6,10 @@ import argparse
|
|||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--name", type=str, default="")
|
||||
parser.add_argument("--exp_mode", type=str, default="mcts", choices=["mcts", "aug", "base"])
|
||||
get_di_args(parser)
|
||||
get_mcts_args(parser)
|
||||
get_aug_exp_args(parser)
|
||||
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
|
@ -38,7 +34,15 @@ def get_di_args(parser):
|
|||
|
||||
|
||||
async def main(args):
|
||||
pass
|
||||
if args.exp_mode == "mcts":
|
||||
experimenter = MCTSExperimenter(args)
|
||||
elif args.exp_mode == "aug":
|
||||
experimenter = AugExperimenter(args)
|
||||
elif args.exp_mode == "base":
|
||||
experimenter = Experimenter(args)
|
||||
else:
|
||||
raise ValueError(f"Invalid exp_mode: {args.exp_mode}")
|
||||
await experimenter.run_experiment()
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = get_args()
|
||||
|
|
|
|||
|
|
@ -27,17 +27,19 @@ data_config = load_data_config()
|
|||
|
||||
if __name__ == "__main__":
|
||||
args = get_args()
|
||||
requirement = generate_task_requirement(args.task, data_config)
|
||||
print(requirement)
|
||||
# requirement = generate_task_requirement(args.task, data_config)
|
||||
# print(requirement)
|
||||
|
||||
# role, root_node = initialize_di_root_node(requirement, data_config)
|
||||
# asyncio.run(role.run(requirement))
|
||||
|
||||
# asyncio.run(root_node.run_node())
|
||||
mcts = MCTS(root_node=None, max_depth=5)
|
||||
best_node = asyncio.run(mcts.search(args.task, data_config,
|
||||
best_nodes = asyncio.run(mcts.search(args.task, data_config,
|
||||
low_is_better=args.low_is_better, load_tree=args.load_tree,
|
||||
reflection=args.reflection, rollout=args.rollout, name=args.name))
|
||||
best_node = best_nodes["global_best"]
|
||||
dev_best_node = best_nodes["dev_best"]
|
||||
text, num_generated_codes = get_tree_text(mcts.root_node)
|
||||
print(text)
|
||||
print(f"Generated {num_generated_codes} unique codes.")
|
||||
|
|
@ -45,6 +47,7 @@ if __name__ == "__main__":
|
|||
with open(f"results/{args.task}_tree{args.name}.txt", "w") as f:
|
||||
f.write(f"Generated {num_generated_codes} unique codes.\n")
|
||||
f.write(f"Best node: {best_node}, score: {best_node.raw_reward}\n")
|
||||
f.write(f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n")
|
||||
f.write(text)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import yaml
|
||||
from metagpt.roles.role import Role
|
||||
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
|
||||
from metagpt.utils.save_code import save_code_file
|
||||
# from nbclient import NotebookClient
|
||||
from nbformat.notebooknode import NotebookNode
|
||||
import nbformat
|
||||
|
|
@ -86,7 +85,8 @@ def process_cells(nb: NotebookNode) -> NotebookNode:
|
|||
def save_notebook(role: Role, save_dir: str = "", name: str = ""):
|
||||
save_dir = Path(save_dir)
|
||||
nb = process_cells(role.execute_code.nb)
|
||||
save_code_file(name=name, code_context=nb, file_format="ipynb", save_dir=save_dir)
|
||||
file_path = save_dir / f"{name}.ipynb"
|
||||
nbformat.write(nb, file_path)
|
||||
|
||||
async def load_execute_notebook(role):
|
||||
tasks = role.planner.plan.tasks
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ Output a json following the format:
|
|||
```json
|
||||
{{
|
||||
"reflection": str = "Reflection on previous implementation",
|
||||
"improved_impl": str = "Refined code after reflection.",
|
||||
"improved_impl": str = "Refined code after reflection (do not include nested code block here).",
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue