add experimenter

This commit is contained in:
Yizhou Chi 2024-08-30 19:55:40 +08:00
parent d14f07f9b1
commit 32759f031c
14 changed files with 196 additions and 51 deletions

View file

@ -169,8 +169,8 @@ class Node():
# return evaluate_score(predictions, gt, metric)
def evaluate_prediction(self, split):
pred_path = os.path.join(self.state["work_dir"], self.state["task"], f"{split}-predictions.csv")
pred_node_path = os.path.join(self.state["node_dir"], f"Node-{self.id}-{split}-predictions.csv")
pred_path = os.path.join(self.state["work_dir"], self.state["task"], f"{split}_predictions.csv")
pred_node_path = os.path.join(self.state["node_dir"], f"Node-{self.id}-{split}_predictions.csv")
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])
preds = pd.read_csv(pred_path)["target"]
preds.to_csv(pred_node_path, index=False)
@ -201,12 +201,12 @@ class Node():
score_dict = await role.get_score()
score_dict = self.evaluate_simulation(score_dict)
self.raw_reward = score_dict
if self.state["low_is_better"]:
# normalized the score to be between 0 and 1, and higher is better
def normalize_score(score):
return 1 / (1 + score)
score_dict = {k: normalize_score(v) for k, v in score_dict.items()}
self.normalized_reward = score_dict
return score_dict
@ -262,19 +262,23 @@ class MCTS():
def best_path(self, root : Node):
best_child = root
best_score = 0
def bfs(node : Node, best_score, best_child : Node):
def bfs(node : Node, best_score, best_child : Node, split):
assert split in ["test_score", "dev_score"]
if node not in self.children:
return best_score, best_child
for child in self.children[node]:
print(child.id, child.raw_value)
if child.raw_value > best_score:
best_score = child.raw_value
score = child.normalized[split]
print(child.id, score)
if score > best_score:
best_score = score
best_child = child
best_score, best_child = bfs(child, best_score, best_child)
return best_score, best_child
best_score, best_child = bfs(root, best_score, best_child)
mcts_logger.log("MCTS", f"Best Score: {best_score}, Best Node ID: {best_child.id}")
return best_child
_, best_child = bfs(root, best_score, best_child, "test_score")
_, dev_best_child = bfs(root, best_score, best_child, "dev_score")
return {"dev_best": dev_best_child,
"global_best": best_child}
def get_num_simulations(self):
return self.root_node.visited

View file

@ -20,8 +20,8 @@ TASK_PROMPT = """\
**Attention** Please do not leak the target label in any form during training.
## Saving Dev and Test Predictions
Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results.
The file should contain a single `target` column with the predicted values.
Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
Both files should contain a single `target` column with the predicted values.
Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
## Output Training Set Performance
@ -129,6 +129,7 @@ def generate_task_requirement(task_name, data_config):
user_requirement = TASK_PROMPT.format(user_requirement=user_requirement,
train_path=train_path, dev_path=dev_path, test_path=test_path,
output_dir=output_dir)
print(user_requirement)
return user_requirement

View file

@ -0,0 +1,3 @@
from .experimenter import Experimenter
from .mcts import MCTSExperimenter
from .aug import AugExperimenter

60
expo/experimenter/aug.py Normal file
View file

@ -0,0 +1,60 @@
from experimenter import Experimenter
from expo.MCTS import create_initial_state
from expo.dataset import generate_task_requirement
from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
from expo.insights.InsightGenerate import InsightGenerator
from expo.research_assistant import ResearchAssistant
EXPS_PROMPT = """
When doing the tasks, you can refer to the insights below:
{experience}
"""
class AugExperimenter(Experimenter):
result_path : str = "results/aug"
async def run_experiment(self):
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = state["requirement"]
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
exp_pool = InsightGenerator.load_analysis_pool(exp_pool_path)
if self.args.aug_mode == "single":
exps = InsightGenerator._random_sample(exp_pool, self.args.num_experiments)
exps = [exp["Analysis"] for exp in exps]
elif self.args.aug_mode == "set":
exp_set = InsightGenerator.sample_instruction_set(exp_pool)
exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set])
exps = [exp_set_text] * self.args.num_experiments
else:
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
results = []
for i in range(self.args.num_experiments):
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.use_reflection)
di.role_dir = f"{di.role_dir}_{self.args.task}"
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
print(requirement)
await di.run(requirement)
score_dict = await di.get_score(low_is_better=False)
score_dict = self.evaluate(score_dict, state)
results.append({
"idx": i,
"score_dict": score_dict,
"aug_mode": self.args.aug_mode,
"insights" : exps[i],
"user_requirement": user_requirement,
"args": self.args
})
scores = [score_dict["test_score"] for score_dict in scores]
avg_score = sum(scores) / len(scores)
best_score = max(scores) if not self.args.low_is_better else min(scores)
best_score_idx = scores.index(best_score)
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
self.save_results(results)

View file

@ -1,18 +1,57 @@
from expo.utils import DATA_CONFIG
import os
import pandas as pd
from expo.evaluation.evaluation import evaluate_score
import datetime
import json
from expo.MCTS import create_initial_state
from expo.research_assistant import ResearchAssistant
class Experimenter:
result_path : str = "results"
data_config = DATA_CONFIG
def __init__(self, args, **kwargs):
self.args = args
self.start_time = datetime.datetime.now().strftime("%Y%m%d%H%M")
async def run_experiment(self):
pass
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = state["requirement"]
di = ResearchAssistant(node_id="0", use_reflection=self.args.use_reflection)
await di.run(user_requirement)
def save_scores(self):
pass
def save_result(self):
score_dict = await di.get_score(low_is_better=False)
score_dict = self.evaluate(score_dict, state)
results = {
"test_score": self.test_score,
"num_experiments": self.num_experiments,
"insights": self.insights,
"avg_score": self.avg_score,
}
"score_dict": score_dict,
"aug_mode": self.args.aug_mode,
"user_requirement": user_requirement,
"args": self.args
}
self.save_result(results)
def evaluate_prediction(self, split, state):
pred_path = os.path.join(state["work_dir"], state["task"], f"{split}_predictions.csv")
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
preds = pd.read_csv(pred_path)["target"]
preds.to_csv(pred_node_path, index=False)
gt = pd.read_csv(gt_path)["target"]
metric = state["dataset_config"]["metric"]
return evaluate_score(preds, gt, metric)
def evaluate(self, score_dict, state):
scores = {
"dev_score": self.evaluate_prediction("dev", state),
"test_score": self.evaluate_prediction("test", state),
}
score_dict.update(scores)
return score_dict
def save_result(self, result):
with open(f"{self.result_path}/{self.args.task}_{self.start_time}.json", "w") as f:
json.dump(result, f, indent=4)

44
expo/experimenter/mcts.py Normal file
View file

@ -0,0 +1,44 @@
from expo.experimenter import Experimenter
from expo.dataset import generate_task_requirement
from expo.MCTS import MCTS
from expo.evaluation.visualize_mcts import get_tree_text
class MCTSExperimenter(Experimenter):
result_path : str = "results/mcts"
async def run_experiment(self):
mcts = MCTS(root_node=None, max_depth=5)
best_nodes = await mcts.search(self.args.task, self.data_config,
low_is_better=self.args.low_is_better,
load_tree=self.args.load_tree,
reflection=self.args.reflection,
rollout=self.args.rollout,
name=self.args.name)
best_node = best_nodes["global_best"]
dev_best_node = best_nodes["dev_best"]
text, num_generated_codes = get_tree_text(mcts.root_node)
text += f"Generated {num_generated_codes} unique codes.\n"
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n"
print(text)
self.save_tree(text)
results = {
"best_node": best_node,
"best_node_score": best_node.raw_reward,
"dev_best_node": dev_best_node,
"dev_best_node_score": dev_best_node.raw_reward,
"num_generated_codes": num_generated_codes,
"user_requirement": best_node.state["requirement"],
"args": self.args
}
self.save_result(results)
def save_tree(self, tree_text):
fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt"
with open(fpath, "w") as f:
f.write(tree_text)

View file

@ -23,7 +23,7 @@ import random
import json
from metagpt.llm import LLM
from metagpt.schema import Message
from expo.utils import load_data_config, mcts_logger
from expo.utils import load_data_config, mcts_logger, clean_json_from_rsp
DATA_CONFIG = load_data_config()
@ -52,17 +52,6 @@ class InsightGenerator:
for task_id in sorted(data_dict.keys()):
instruction_set.append(random.choice(data_dict[task_id]))
return instruction_set
@staticmethod
def clean_json_from_rsp(text):
pattern = r"```json(.*?)```"
matches = re.findall(pattern, text, re.DOTALL)
if matches:
json_str = "\n".join(matches)
return json_str
else:
return ""
@staticmethod
def format_output(rsp):
@ -109,6 +98,6 @@ class InsightGenerator:
llm_response = await llm.aask(
context, system_msgs=[REFLECTION_SYSTEM_MSG]
)
rsp = InsightGenerator.clean_json_from_rsp(llm_response)
rsp = clean_json_from_rsp(llm_response)
new_instruction = json.loads(rsp)["New Instruction"]
return new_instruction

View file

@ -8,7 +8,6 @@ from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
from metagpt.utils.common import CodeParser
from metagpt.utils.common import write_json_file, read_json_file, format_trackback_info
from metagpt.const import MESSAGE_ROUTE_TO_ALL, SERDESER_PATH
from metagpt.utils.recovery_util import save_history
from expo.utils import mcts_logger, save_notebook
from pydantic import Field, model_validator
from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
@ -126,7 +125,6 @@ class ResearchAssistant(DataInterpreter):
role_path = os.path.join(stg_path, f"{name}.json")
# 将状态保存为 JSON 文件
write_json_file(role_path, self.model_dump())
save_history(role=self, save_dir=stg_path, name=name)
def remap_tasks(self):

View file

@ -1,7 +1,4 @@
from expo.MCTS import MCTS, Node, initialize_di_root_node
from expo.utils import load_data_config
from expo.dataset import generate_task_requirement
from expo.evaluation.visualize_mcts import get_tree_text
from expo.experimenter import MCTSExperimenter, Experimenter, AugExperimenter
import asyncio
import argparse
@ -9,11 +6,10 @@ import argparse
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--name", type=str, default="")
parser.add_argument("--exp_mode", type=str, default="mcts", choices=["mcts", "aug", "base"])
get_di_args(parser)
get_mcts_args(parser)
get_aug_exp_args(parser)
return parser.parse_args()
@ -38,7 +34,15 @@ def get_di_args(parser):
async def main(args):
pass
if args.exp_mode == "mcts":
experimenter = MCTSExperimenter(args)
elif args.exp_mode == "aug":
experimenter = AugExperimenter(args)
elif args.exp_mode == "base":
experimenter = Experimenter(args)
else:
raise ValueError(f"Invalid exp_mode: {args.exp_mode}")
await experimenter.run_experiment()
if __name__ == "__main__":
args = get_args()

View file

@ -27,17 +27,19 @@ data_config = load_data_config()
if __name__ == "__main__":
args = get_args()
requirement = generate_task_requirement(args.task, data_config)
print(requirement)
# requirement = generate_task_requirement(args.task, data_config)
# print(requirement)
# role, root_node = initialize_di_root_node(requirement, data_config)
# asyncio.run(role.run(requirement))
# asyncio.run(root_node.run_node())
mcts = MCTS(root_node=None, max_depth=5)
best_node = asyncio.run(mcts.search(args.task, data_config,
best_nodes = asyncio.run(mcts.search(args.task, data_config,
low_is_better=args.low_is_better, load_tree=args.load_tree,
reflection=args.reflection, rollout=args.rollout, name=args.name))
best_node = best_nodes["global_best"]
dev_best_node = best_nodes["dev_best"]
text, num_generated_codes = get_tree_text(mcts.root_node)
print(text)
print(f"Generated {num_generated_codes} unique codes.")
@ -45,6 +47,7 @@ if __name__ == "__main__":
with open(f"results/{args.task}_tree{args.name}.txt", "w") as f:
f.write(f"Generated {num_generated_codes} unique codes.\n")
f.write(f"Best node: {best_node}, score: {best_node.raw_reward}\n")
f.write(f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n")
f.write(text)

View file

@ -1,7 +1,6 @@
import yaml
from metagpt.roles.role import Role
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.utils.save_code import save_code_file
# from nbclient import NotebookClient
from nbformat.notebooknode import NotebookNode
import nbformat
@ -86,7 +85,8 @@ def process_cells(nb: NotebookNode) -> NotebookNode:
def save_notebook(role: Role, save_dir: str = "", name: str = ""):
save_dir = Path(save_dir)
nb = process_cells(role.execute_code.nb)
save_code_file(name=name, code_context=nb, file_format="ipynb", save_dir=save_dir)
file_path = save_dir / f"{name}.ipynb"
nbformat.write(nb, file_path)
async def load_execute_notebook(role):
tasks = role.planner.plan.tasks

View file

@ -69,7 +69,7 @@ Output a json following the format:
```json
{{
"reflection": str = "Reflection on previous implementation",
"improved_impl": str = "Refined code after reflection.",
"improved_impl": str = "Refined code after reflection (do not include nested code block here).",
}}
```
"""