mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-02 14:45:17 +02:00
format code
This commit is contained in:
parent
fcd1ba66a6
commit
ab8a1d6824
17 changed files with 433 additions and 396 deletions
|
|
@ -1,4 +0,0 @@
|
|||
from .experimenter import Experimenter
|
||||
from .mcts import MCTSExperimenter
|
||||
from .aug import AugExperimenter
|
||||
from .custom import CustomExperimenter
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
from experimenter import Experimenter
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
|
||||
|
||||
from expo.insights.instruction_generator import InstructionGenerator
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
from expo.utils import get_exp_pool_path
|
||||
|
||||
EXPS_PROMPT = """
|
||||
When doing the tasks, you can refer to the insights below:
|
||||
|
|
@ -12,10 +11,8 @@ When doing the tasks, you can refer to the insights below:
|
|||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
class AugExperimenter(Experimenter):
|
||||
result_path : str = "results/aug"
|
||||
result_path: str = "results/aug"
|
||||
|
||||
async def run_experiment(self):
|
||||
# state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
|
|
@ -31,7 +28,7 @@ class AugExperimenter(Experimenter):
|
|||
exps = [exp_set_text] * self.args.num_experiments
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
|
||||
|
||||
|
||||
results = []
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.reflection)
|
||||
|
|
@ -39,20 +36,19 @@ class AugExperimenter(Experimenter):
|
|||
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
|
||||
print(requirement)
|
||||
score_dict = await self.run_di(di, requirement)
|
||||
results.append({
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"insights" : exps[i],
|
||||
"user_requirement": requirement,
|
||||
"args": vars(self.args)
|
||||
})
|
||||
results.append(
|
||||
{
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"aug_mode": self.args.aug_mode,
|
||||
"insights": exps[i],
|
||||
"user_requirement": requirement,
|
||||
"args": vars(self.args),
|
||||
}
|
||||
)
|
||||
scores = [result["score_dict"]["test_score"] for result in results]
|
||||
avg_score = sum(scores) / len(scores)
|
||||
best_score = max(scores) if not self.args.low_is_better else min(scores)
|
||||
best_score_idx = scores.index(best_score)
|
||||
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
|
||||
self.save_result(results)
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,13 +1,15 @@
|
|||
from expo.experimenter.custom import CustomExperimenter
|
||||
from autogluon.tabular import TabularDataset, TabularPredictor
|
||||
|
||||
class AGRunner():
|
||||
from expo.experimenter.custom import CustomExperimenter
|
||||
|
||||
|
||||
class AGRunner:
|
||||
preset = "best_quality"
|
||||
time_limit = 500
|
||||
|
||||
def __init__(self, datasets):
|
||||
self.datasets = datasets
|
||||
|
||||
|
||||
def run(self):
|
||||
train_path = self.datasets["train"]
|
||||
test_wo_target_path = self.datasets["test_wo_target"]
|
||||
|
|
@ -16,17 +18,16 @@ class AGRunner():
|
|||
train_data = TabularDataset(train_path)
|
||||
test_data = TabularDataset(test_wo_target_path)
|
||||
dev_data = TabularDataset(dev_wo_target_path)
|
||||
|
||||
|
||||
predictor = TabularPredictor(label=target_col).fit(train_data, presets=self.preset, time_limit=self.time_limit)
|
||||
test_preds = predictor.predict(test_data)
|
||||
dev_preds = predictor.predict(dev_data)
|
||||
return {"test_preds": test_preds, "dev_preds": dev_preds}
|
||||
|
||||
|
||||
class GluonExperimenter(CustomExperimenter):
|
||||
result_path : str = "results/autogluon"
|
||||
result_path: str = "results/autogluon"
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
super().__init__(args, **kwargs)
|
||||
self.framework = AGRunner(self.datasets)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,21 +1,26 @@
|
|||
from expo.experimenter import Experimenter
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
from expo.experimenter import Experimenter
|
||||
from expo.MCTS import create_initial_state
|
||||
|
||||
|
||||
class CustomExperimenter(Experimenter):
|
||||
result_path : str = "results/custom"
|
||||
|
||||
result_path: str = "results/custom"
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
super().__init__(args, **kwargs)
|
||||
self.framework = kwargs["framework"] # todo
|
||||
self.framework = kwargs["framework"] # todo
|
||||
self.task = kwargs.get("task", self.args.task)
|
||||
self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better)
|
||||
self.name = kwargs.get("name", "")
|
||||
self.result_path = f"results/custom_{self.name}"
|
||||
self.state = create_initial_state(self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name)
|
||||
|
||||
self.state = create_initial_state(
|
||||
self.task, start_task_id=1, data_config=self.data_config, low_is_better=self.low_is_better, name=self.name
|
||||
)
|
||||
|
||||
def run_experiment(self):
|
||||
user_requirement = self.state["requirement"]
|
||||
preds = self.framework.run(user_requirement)
|
||||
|
|
@ -23,13 +28,9 @@ class CustomExperimenter(Experimenter):
|
|||
dev_preds = preds["dev_preds"]
|
||||
score_dict = {
|
||||
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
|
||||
"test_score": self.evaluate_predictions(test_preds, "test")
|
||||
}
|
||||
results = {
|
||||
"score_dict": score_dict,
|
||||
"user_requirement": user_requirement,
|
||||
"args": vars(self.args)
|
||||
"test_score": self.evaluate_predictions(test_preds, "test"),
|
||||
}
|
||||
results = {"score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_pred_files(self, dev_pred_path, test_pred_path):
|
||||
|
|
@ -37,7 +38,7 @@ class CustomExperimenter(Experimenter):
|
|||
test_preds = pd.read_csv(test_pred_path)["target"]
|
||||
score_dict = {
|
||||
"dev_score": self.evaluate_score(dev_preds, "dev"),
|
||||
"test_score": self.evaluate_score(test_preds, "test")
|
||||
"test_score": self.evaluate_score(test_preds, "test"),
|
||||
}
|
||||
return score_dict
|
||||
|
||||
|
|
@ -46,8 +47,7 @@ class CustomExperimenter(Experimenter):
|
|||
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])
|
||||
gt = pd.read_csv(gt_path)["target"]
|
||||
score = evaluate_score(preds, gt, metric)
|
||||
return score
|
||||
|
||||
return score
|
||||
|
||||
def load_datasets(self):
|
||||
train_path = self.state["datasets_dir"]["train"]
|
||||
|
|
@ -57,4 +57,3 @@ class CustomExperimenter(Experimenter):
|
|||
dev = pd.read_csv(dev_path)
|
||||
test = pd.read_csv(test_path)
|
||||
return train, dev, test
|
||||
|
||||
|
|
|
|||
|
|
@ -1,23 +1,29 @@
|
|||
from expo.utils import DATA_CONFIG
|
||||
import os
|
||||
import pandas as pd
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
from expo.utils import DATA_CONFIG
|
||||
|
||||
|
||||
class Experimenter:
|
||||
result_path : str = "results/base"
|
||||
result_path: str = "results/base"
|
||||
data_config = DATA_CONFIG
|
||||
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
self.args = args
|
||||
self.start_time = datetime.datetime.now().strftime("%Y%m%d%H%M")
|
||||
self.state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
|
||||
|
||||
self.state = create_initial_state(
|
||||
self.args.task,
|
||||
start_task_id=1,
|
||||
data_config=self.data_config,
|
||||
low_is_better=self.args.low_is_better,
|
||||
name="",
|
||||
)
|
||||
|
||||
async def run_di(self, di, user_requirement):
|
||||
max_retries = 3
|
||||
|
|
@ -33,14 +39,8 @@ class Experimenter:
|
|||
print(f"Error: {e}")
|
||||
num_runs += 1
|
||||
if not run_finished:
|
||||
score_dict = {
|
||||
"train_score": -1,
|
||||
"dev_score": -1,
|
||||
"test_score": -1,
|
||||
"score": -1
|
||||
}
|
||||
score_dict = {"train_score": -1, "dev_score": -1, "test_score": -1, "score": -1}
|
||||
return score_dict
|
||||
|
||||
|
||||
async def run_experiment(self):
|
||||
state = self.state
|
||||
|
|
@ -50,28 +50,28 @@ class Experimenter:
|
|||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
|
||||
score_dict = await self.run_di(di, user_requirement)
|
||||
results.append({
|
||||
"idx": i,
|
||||
"score_dict": score_dict,
|
||||
"user_requirement": user_requirement,
|
||||
"args": vars(self.args)
|
||||
})
|
||||
self.save_result(results) # save intermediate results
|
||||
results.append(
|
||||
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
|
||||
)
|
||||
self.save_result(results) # save intermediate results
|
||||
dev_scores = [result["score_dict"]["dev_score"] for result in results]
|
||||
best_dev_score = max(dev_scores) if not self.args.low_is_better else min(dev_scores)
|
||||
best_score_idx = dev_scores.index(best_dev_score)
|
||||
|
||||
|
||||
test_scores = [result["score_dict"]["test_score"] for result in results]
|
||||
avg_score = sum(test_scores) / len(test_scores)
|
||||
global_best_score = max(test_scores) if not self.args.low_is_better else min(test_scores)
|
||||
|
||||
results.insert(0, {
|
||||
"best_dev_score": best_dev_score,
|
||||
"best_score_idx": best_score_idx,
|
||||
"best_test_score": test_scores[best_score_idx],
|
||||
"avg_test_score": avg_score,
|
||||
"best_score": global_best_score
|
||||
})
|
||||
results.insert(
|
||||
0,
|
||||
{
|
||||
"best_dev_score": best_dev_score,
|
||||
"best_score_idx": best_score_idx,
|
||||
"best_test_score": test_scores[best_score_idx],
|
||||
"avg_test_score": avg_score,
|
||||
"best_score": global_best_score,
|
||||
},
|
||||
)
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_prediction(self, split, state):
|
||||
|
|
@ -85,7 +85,7 @@ class Experimenter:
|
|||
metric = state["dataset_config"]["metric"]
|
||||
os.remove(pred_path)
|
||||
return evaluate_score(preds, gt, metric)
|
||||
|
||||
|
||||
def evaluate(self, score_dict, state):
|
||||
scores = {
|
||||
"dev_score": self.evaluate_prediction("dev", state),
|
||||
|
|
@ -94,13 +94,12 @@ class Experimenter:
|
|||
score_dict.update(scores)
|
||||
return score_dict
|
||||
|
||||
|
||||
def save_result(self, result):
|
||||
end_time = datetime.datetime.now().strftime("%Y%m%d%H%M")
|
||||
time_info = {
|
||||
"start_time": self.start_time,
|
||||
"end_time": end_time,
|
||||
"duration (minutes)": float(end_time) - float(self.start_time)
|
||||
"duration (minutes)": float(end_time) - float(self.start_time),
|
||||
}
|
||||
result = result.copy()
|
||||
result.insert(0, time_info)
|
||||
|
|
|
|||
|
|
@ -1,22 +1,25 @@
|
|||
from expo.experimenter import Experimenter
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.MCTS import MCTS
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
from expo.experimenter import Experimenter
|
||||
from expo.MCTS import MCTS
|
||||
|
||||
|
||||
class MCTSExperimenter(Experimenter):
|
||||
result_path : str = "results/mcts"
|
||||
result_path: str = "results/mcts"
|
||||
|
||||
async def run_experiment(self):
|
||||
mcts = MCTS(root_node=None, max_depth=5)
|
||||
best_nodes = await mcts.search(self.args.task, self.data_config,
|
||||
low_is_better=self.args.low_is_better,
|
||||
load_tree=self.args.load_tree,
|
||||
reflection=self.args.reflection,
|
||||
rollouts=self.args.rollouts,
|
||||
name=self.args.name)
|
||||
best_nodes = await mcts.search(
|
||||
self.args.task,
|
||||
self.data_config,
|
||||
low_is_better=self.args.low_is_better,
|
||||
load_tree=self.args.load_tree,
|
||||
reflection=self.args.reflection,
|
||||
rollouts=self.args.rollouts,
|
||||
name=self.args.name,
|
||||
)
|
||||
best_node = best_nodes["global_best"]
|
||||
dev_best_node = best_nodes["dev_best"]
|
||||
|
||||
|
||||
text, num_generated_codes = get_tree_text(mcts.root_node)
|
||||
text += f"Generated {num_generated_codes} unique codes.\n"
|
||||
text += f"Best node: {best_node}, score: {best_node.raw_reward}\n"
|
||||
|
|
@ -24,22 +27,21 @@ class MCTSExperimenter(Experimenter):
|
|||
print(text)
|
||||
self.save_tree(text)
|
||||
|
||||
results = [{
|
||||
"best_node": best_node.id,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node.id,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"tree_text": text,
|
||||
"args": vars(self.args)
|
||||
}]
|
||||
results = [
|
||||
{
|
||||
"best_node": best_node.id,
|
||||
"best_node_score": best_node.raw_reward,
|
||||
"dev_best_node": dev_best_node.id,
|
||||
"dev_best_node_score": dev_best_node.raw_reward,
|
||||
"num_generated_codes": num_generated_codes,
|
||||
"user_requirement": best_node.state["requirement"],
|
||||
"tree_text": text,
|
||||
"args": vars(self.args),
|
||||
}
|
||||
]
|
||||
self.save_result(results)
|
||||
|
||||
|
||||
|
||||
def save_tree(self, tree_text):
|
||||
fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt"
|
||||
with open(fpath, "w") as f:
|
||||
f.write(tree_text)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue