mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-29 15:59:42 +02:00
add experimenter
This commit is contained in:
parent
d14f07f9b1
commit
32759f031c
14 changed files with 196 additions and 51 deletions
3
expo/experimenter/__init__.py
Normal file
3
expo/experimenter/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .experimenter import Experimenter
|
||||
from .mcts import MCTSExperimenter
|
||||
from .aug import AugExperimenter
|
||||
60
expo/experimenter/aug.py
Normal file
60
expo/experimenter/aug.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from experimenter import Experimenter
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
|
||||
from expo.insights.InsightGenerate import InsightGenerator
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
|
||||
EXPS_PROMPT = """
|
||||
When doing the tasks, you can refer to the insights below:
|
||||
{experience}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
class AugExperimenter(Experimenter):
|
||||
result_path : str = "results/aug"
|
||||
|
||||
async def run_experiment(self):
|
||||
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
user_requirement = state["requirement"]
|
||||
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
|
||||
exp_pool = InsightGenerator.load_analysis_pool(exp_pool_path)
|
||||
if self.args.aug_mode == "single":
|
||||
exps = InsightGenerator._random_sample(exp_pool, self.args.num_experiments)
|
||||
exps = [exp["Analysis"] for exp in exps]
|
||||
elif self.args.aug_mode == "set":
|
||||
exp_set = InsightGenerator.sample_instruction_set(exp_pool)
|
||||
exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set])
|
||||
exps = [exp_set_text] * self.args.num_experiments
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
|
||||
|
||||
results = []
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.use_reflection)
|
||||
di.role_dir = f"{di.role_dir}_{self.args.task}"
|
||||
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
|
||||
print(requirement)
|
||||
await di.run(requirement)
|
||||
score_dict = await di.get_score(low_is_better=False)
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results.append({
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"insights" : exps[i],
|
||||
"user_requirement": user_requirement,
|
||||
"args": self.args
|
||||
})
|
||||
scores = [score_dict["test_score"] for score_dict in scores]
|
||||
avg_score = sum(scores) / len(scores)
|
||||
best_score = max(scores) if not self.args.low_is_better else min(scores)
|
||||
best_score_idx = scores.index(best_score)
|
||||
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
|
||||
self.save_results(results)
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,18 +1,57 @@
|
|||
from expo.utils import DATA_CONFIG
|
||||
import os
|
||||
import pandas as pd
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
import datetime
|
||||
import json
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
|
||||
|
||||
class Experimenter:
|
||||
result_path : str = "results"
|
||||
data_config = DATA_CONFIG
|
||||
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
self.args = args
|
||||
self.start_time = datetime.datetime.now().strftime("%Y%m%d%H%M")
|
||||
|
||||
async def run_experiment(self):
|
||||
pass
|
||||
|
||||
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
user_requirement = state["requirement"]
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.use_reflection)
|
||||
await di.run(user_requirement)
|
||||
|
||||
def save_scores(self):
|
||||
pass
|
||||
|
||||
def save_result(self):
|
||||
score_dict = await di.get_score(low_is_better=False)
|
||||
score_dict = self.evaluate(score_dict, state)
|
||||
results = {
|
||||
"test_score": self.test_score,
|
||||
"num_experiments": self.num_experiments,
|
||||
"insights": self.insights,
|
||||
"avg_score": self.avg_score,
|
||||
}
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"user_requirement": user_requirement,
|
||||
"args": self.args
|
||||
}
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_prediction(self, split, state):
|
||||
pred_path = os.path.join(state["work_dir"], state["task"], f"{split}_predictions.csv")
|
||||
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
|
||||
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
|
||||
preds = pd.read_csv(pred_path)["target"]
|
||||
preds.to_csv(pred_node_path, index=False)
|
||||
gt = pd.read_csv(gt_path)["target"]
|
||||
metric = state["dataset_config"]["metric"]
|
||||
return evaluate_score(preds, gt, metric)
|
||||
|
||||
def evaluate(self, score_dict, state):
|
||||
scores = {
|
||||
"dev_score": self.evaluate_prediction("dev", state),
|
||||
"test_score": self.evaluate_prediction("test", state),
|
||||
}
|
||||
score_dict.update(scores)
|
||||
return score_dict
|
||||
|
||||
|
||||
def save_result(self, result):
|
||||
with open(f"{self.result_path}/{self.args.task}_{self.start_time}.json", "w") as f:
|
||||
json.dump(result, f, indent=4)
|
||||
|
|
|
|||
44
expo/experimenter/mcts.py
Normal file
44
expo/experimenter/mcts.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
from expo.experimenter import Experimenter
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.MCTS import MCTS
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
|
||||
|
||||
class MCTSExperimenter(Experimenter):
|
||||
result_path : str = "results/mcts"
|
||||
async def run_experiment(self):
|
||||
mcts = MCTS(root_node=None, max_depth=5)
|
||||
best_nodes = await mcts.search(self.args.task, self.data_config,
|
||||
low_is_better=self.args.low_is_better,
|
||||
load_tree=self.args.load_tree,
|
||||
reflection=self.args.reflection,
|
||||
rollout=self.args.rollout,
|
||||
name=self.args.name)
|
||||
best_node = best_nodes["global_best"]
|
||||
dev_best_node = best_nodes["dev_best"]
|
||||
|
||||
text, num_generated_codes = get_tree_text(mcts.root_node)
|
||||
text += f"Generated {num_generated_codes} unique codes.\n"
|
||||
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
|
||||
text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n"
|
||||
print(text)
|
||||
self.save_tree(text)
|
||||
|
||||
results = {
|
||||
"best_node": best_node,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"args": self.args
|
||||
}
|
||||
self.save_result(results)
|
||||
|
||||
|
||||
|
||||
def save_tree(self, tree_text):
|
||||
fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt"
|
||||
with open(fpath, "w") as f:
|
||||
f.write(tree_text)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue