mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-21 14:05:17 +02:00
Merge branch 'improve-multimodal' into 'expo'
Improve multimodal See merge request agents/exp_optimizer!22
This commit is contained in:
commit
573e9b6d9e
9 changed files with 77 additions and 36 deletions
31
expo/MCTS.py
31
expo/MCTS.py
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
|
|
@ -240,6 +241,7 @@ class MCTS:
|
|||
max_depth: int = 5
|
||||
c_explore: float = 1.4
|
||||
c_unvisited: float = 0.8
|
||||
node_order: list = []
|
||||
|
||||
def __init__(self, root_node, max_depth, use_fixed_insights):
|
||||
self.root_node = root_node
|
||||
|
|
@ -306,11 +308,32 @@ class MCTS:
|
|||
_, global_best_child = bfs(root, global_best_score, best_child, "test_score")
|
||||
_, dev_best_child = bfs(root, dev_best_score, best_child, "dev_score")
|
||||
|
||||
return {"dev_best": dev_best_child, "global_best": global_best_child}
|
||||
return {"dev_best": dev_best_child, "global_best": global_best_child, "scores": self.get_score_order_dict()}
|
||||
|
||||
def get_num_simulations(self):
|
||||
return self.root_node.visited
|
||||
|
||||
def save_node_order(self, node_id):
|
||||
self.node_order.append(node_id)
|
||||
with open(os.path.join(self.root_node.state["node_dir"], "node_order.json"), "w") as f:
|
||||
json.dump(self.node_order, f)
|
||||
|
||||
def load_node_order(self):
|
||||
with open(os.path.join(self.root_node.state["node_dir"], "node_order.json"), "r") as f:
|
||||
self.node_order = json.load(f)
|
||||
|
||||
def get_score_order_dict(self):
|
||||
scores = {"dev": [], "test": [], "dev_raw": [], "test_raw": []}
|
||||
for node_id in self.node_order:
|
||||
node = Node(parent=None, state=self.root_node.state, action=None, value=0)
|
||||
node.id = node_id
|
||||
node = node.load_node()
|
||||
scores["dev"].append(node.normalized_reward["dev_score"])
|
||||
scores["test"].append(node.normalized_reward["test_score"])
|
||||
scores["dev_raw"].append(node.raw_reward["dev_score"])
|
||||
scores["test_raw"].append(node.raw_reward["test_score"])
|
||||
return scores
|
||||
|
||||
async def search(self, state, rollouts, load_tree=False, reflection=False):
|
||||
role, root = initialize_di_root_node(state, reflection=reflection)
|
||||
self.root_node = root
|
||||
|
|
@ -329,8 +352,12 @@ class MCTS:
|
|||
self.backpropagate(root, reward)
|
||||
node, reward = await self.expand_and_simulate(root)
|
||||
# self.backpropagate(node, reward)
|
||||
self.save_node_order(root.id)
|
||||
self.save_node_order(node.id)
|
||||
else:
|
||||
root = self.root_node
|
||||
self.load_node_order()
|
||||
|
||||
for _ in range(rollouts): # number of rollouts
|
||||
mcts_logger.log("MCTS", f"Start the next rollout {_+1}")
|
||||
node = self.select(root)
|
||||
|
|
@ -344,6 +371,7 @@ class MCTS:
|
|||
else:
|
||||
node, reward = await self.expand_and_simulate(node)
|
||||
# self.backpropagate(node, reward)
|
||||
self.save_node_order(node.id)
|
||||
return self.best_path(root)
|
||||
|
||||
async def expand_and_simulate(self, node):
|
||||
|
|
@ -373,6 +401,7 @@ class MCTS:
|
|||
self.root_node = pickle.load(f)
|
||||
self.children[self.root_node] = self.root_node.children
|
||||
load_children_node(self.root_node)
|
||||
|
||||
if self.children:
|
||||
return True
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -20,12 +20,23 @@ USE_AG = """
|
|||
7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
|
||||
"""
|
||||
|
||||
TEXT_MODALITY = """
|
||||
7. You could use models from transformers library for this text dataset.
|
||||
8. Use gpu if available for faster training.
|
||||
"""
|
||||
|
||||
IMAGE_MODALITY = """
|
||||
7. You could use models from transformers/torchvision library for this image dataset.
|
||||
8. Use gpu if available for faster training.
|
||||
"""
|
||||
|
||||
STACKING = """
|
||||
7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
|
||||
8. You could do some quick model prototyping to see which models work best and then use them in the ensemble.
|
||||
"""
|
||||
|
||||
SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING}
|
||||
|
||||
SPECIAL_INSTRUCTIONS = {"ag": USE_AG, "stacking": STACKING, "text": TEXT_MODALITY, "image": IMAGE_MODALITY}
|
||||
|
||||
DI_INSTRUCTION = """
|
||||
## Attention
|
||||
|
|
@ -39,8 +50,8 @@ DI_INSTRUCTION = """
|
|||
## Saving Dev and Test Predictions
|
||||
1. Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
|
||||
- Both files should contain a single column named `target` with the predicted values.
|
||||
2. Make sure the prediction results are in the same format as the target column in the training set.
|
||||
- For instance, if the target column is categorical, the prediction results should be categorical as well.
|
||||
2. Make sure the prediction results are in the same format as the target column in the original training set.
|
||||
- For instance, if the original target column is a list of string, the prediction results should also be strings.
|
||||
|
||||
## Output Performance
|
||||
Print the train and dev set performance in the last step.
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ HFDATSETS = [
|
|||
"name": "oxford-iiit-pet",
|
||||
"dataset_name": "timm/oxford-iiit-pet",
|
||||
"image_col": "image",
|
||||
"target_col": "label_cat_dog",
|
||||
"target_col": "label",
|
||||
"modality": "image",
|
||||
},
|
||||
{
|
||||
|
|
@ -115,7 +115,7 @@ class HFExpDataset(ExpDataset):
|
|||
|
||||
if __name__ == "__main__":
|
||||
dataset_dir = "D:/work/automl/datasets"
|
||||
save_analysis_pool = False
|
||||
save_analysis_pool = True
|
||||
force_update = False
|
||||
datasets_dict = {"datasets": {}}
|
||||
solution_designer = SolutionDesigner()
|
||||
|
|
|
|||
|
|
@ -201,11 +201,11 @@ datasets:
|
|||
\ Do not plot or make any visualizations.\n"
|
||||
oxford-iiit-pet:
|
||||
dataset: oxford-iiit-pet
|
||||
metric: f1
|
||||
target_col: label_cat_dog
|
||||
metric: f1 weighted
|
||||
target_col: label
|
||||
user_requirement: "This is a oxford-iiit-pet dataset. Your goal is to predict\
|
||||
\ the target column `label_cat_dog`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport f1 on the\
|
||||
\ the target column `label`.\nPerform data analysis, data preprocessing,\
|
||||
\ feature engineering, and modeling to predict the target. \nReport f1 weighted on the\
|
||||
\ eval data. Do not plot or make any visualizations.\n"
|
||||
sms_spam:
|
||||
dataset: sms_spam
|
||||
|
|
|
|||
|
|
@ -49,9 +49,5 @@ class AugExperimenter(Experimenter):
|
|||
"args": vars(self.args),
|
||||
}
|
||||
)
|
||||
scores = [result["score_dict"]["test_score"] for result in results]
|
||||
avg_score = sum(scores) / len(scores)
|
||||
best_score = max(scores) if not self.args.low_is_better else min(scores)
|
||||
best_score_idx = scores.index(best_score)
|
||||
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
|
||||
results = self.summarize_results(results)
|
||||
self.save_result(results)
|
||||
|
|
|
|||
|
|
@ -47,18 +47,7 @@ class Experimenter:
|
|||
score_dict = {"train_score": -1, "dev_score": -1, "test_score": -1, "score": -1}
|
||||
return score_dict
|
||||
|
||||
async def run_experiment(self):
|
||||
state = self.state
|
||||
user_requirement = state["requirement"]
|
||||
results = []
|
||||
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
|
||||
score_dict = await self.run_di(di, user_requirement, run_idx=i)
|
||||
results.append(
|
||||
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
|
||||
)
|
||||
self.save_result(results) # save intermediate results
|
||||
def summarize_results(self, results):
|
||||
dev_scores = [result["score_dict"]["dev_score"] for result in results]
|
||||
best_dev_score = (
|
||||
max(dev_scores)
|
||||
|
|
@ -85,6 +74,22 @@ class Experimenter:
|
|||
"global_best_test_score": global_best_score,
|
||||
},
|
||||
)
|
||||
return results
|
||||
|
||||
async def run_experiment(self):
|
||||
state = self.state
|
||||
user_requirement = state["requirement"]
|
||||
results = []
|
||||
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
|
||||
score_dict = await self.run_di(di, user_requirement, run_idx=i)
|
||||
results.append(
|
||||
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
|
||||
)
|
||||
self.save_result(results) # save intermediate results
|
||||
results = self.summarize_results(results)
|
||||
|
||||
self.save_result(results)
|
||||
|
||||
def evaluate_prediction(self, split, state):
|
||||
|
|
|
|||
|
|
@ -29,17 +29,13 @@ class MCTSExperimenter(Experimenter):
|
|||
)
|
||||
best_node = best_nodes["global_best"]
|
||||
dev_best_node = best_nodes["dev_best"]
|
||||
|
||||
self.copy_notebook(best_node, "best")
|
||||
self.copy_notebook(dev_best_node, "dev_best")
|
||||
score_dict = best_nodes["scores"]
|
||||
|
||||
text, num_generated_codes = get_tree_text(mcts.root_node)
|
||||
text += f"Generated {num_generated_codes} unique codes.\n"
|
||||
text += f"Best node: {best_node.id}, score: {best_node.raw_reward}\n"
|
||||
text += f"Dev best node: {dev_best_node.id}, score: {dev_best_node.raw_reward}\n"
|
||||
print(text)
|
||||
self.save_tree(text)
|
||||
|
||||
results = [
|
||||
{
|
||||
"best_node": best_node.id,
|
||||
|
|
@ -50,9 +46,13 @@ class MCTSExperimenter(Experimenter):
|
|||
"user_requirement": best_node.state["requirement"],
|
||||
"tree_text": text,
|
||||
"args": vars(self.args),
|
||||
"scores": score_dict,
|
||||
}
|
||||
]
|
||||
self.save_result(results)
|
||||
self.copy_notebook(best_node, "best")
|
||||
self.copy_notebook(dev_best_node, "dev_best")
|
||||
self.save_tree(text)
|
||||
|
||||
def copy_notebook(self, node, name):
|
||||
node_dir = node.get_node_dir()
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@ import asyncio
|
|||
|
||||
from expo.experimenter.aug import AugExperimenter
|
||||
from expo.experimenter.autogluon import GluonExperimenter
|
||||
from expo.experimenter.autosklearn import AutoSklearnExperimenter
|
||||
from expo.experimenter.custom import CustomExperimenter
|
||||
from expo.experimenter.experimenter import Experimenter
|
||||
from expo.experimenter.mcts import MCTSExperimenter
|
||||
from expo.experimenter.autosklearn import AutoSklearnExperimenter
|
||||
|
||||
|
||||
def get_args():
|
||||
|
|
@ -44,7 +44,7 @@ def get_di_args(parser):
|
|||
parser.add_argument("--reflection", dest="reflection", action="store_true")
|
||||
parser.add_argument("--no_reflection", dest="reflection", action="store_false")
|
||||
parser.add_argument("--num_experiments", type=int, default=1)
|
||||
parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking"])
|
||||
parser.add_argument("--special_instruction", type=str, default=None, choices=["ag", "stacking", "text", "image"])
|
||||
parser.set_defaults(reflection=True)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ The current task is about feature engineering. when performing it, please adhere
|
|||
MODEL_TRAIN_PROMPT = """
|
||||
The current task is about training a model, please ensure high performance:
|
||||
- For tabular datasets - you have access to XGBoost, CatBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
|
||||
- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
|
||||
- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
|
||||
- For image datasets - you have access to Swin Transformer, ViT, ResNet, EfficientNet, etc.
|
||||
- For text datasets - you have access to Electra, DeBERTa, GPT-2, BERT, etc.
|
||||
- Avoid the use of SVM because of its high training time.
|
||||
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
|
||||
- If non-numeric columns exist, perform label encode together with all steps.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue