mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-15 11:02:36 +02:00
allow datasets to be prepared by users
This commit is contained in:
parent
1d4a845120
commit
07800be441
2 changed files with 31 additions and 12 deletions
|
|
@ -1,3 +1,4 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
|
@ -18,22 +19,22 @@ Report {metric} on the eval data. Do not plot or make any visualizations.
|
|||
"""
|
||||
|
||||
USE_AG = """
|
||||
7. Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
|
||||
- Please use autogluon for model training with presets='medium_quality', time_limit=None, give dev dataset to tuning_data, and use right eval_metric.
|
||||
"""
|
||||
|
||||
TEXT_MODALITY = """
|
||||
7. You could use models from transformers library for this text dataset.
|
||||
8. Use gpu if available for faster training.
|
||||
- You could use models from transformers library for this text dataset.
|
||||
- Use gpu if available for faster training.
|
||||
"""
|
||||
|
||||
IMAGE_MODALITY = """
|
||||
7. You could use models from transformers/torchvision library for this image dataset.
|
||||
8. Use gpu if available for faster training.
|
||||
- You could use models from transformers/torchvision library for this image dataset.
|
||||
- Use gpu if available for faster training.
|
||||
"""
|
||||
|
||||
STACKING = """
|
||||
7. To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
|
||||
8. You could do some quick model prototyping to see which models work best and then use them in the ensemble.
|
||||
- To avoid overfitting, train a weighted ensemble model such as StackingClassifier or StackingRegressor.
|
||||
- You could do some quick model prototyping to see which models work best and then use them in the ensemble.
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -361,10 +362,22 @@ async def process_dataset(dataset, solution_designer: SolutionDesigner, save_ana
|
|||
datasets_dict["datasets"][dataset.name] = dataset_dict
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--force_update", action="store_true", help="Force update datasets")
|
||||
parser.add_argument("--save_analysis_pool", action="store_true", help="Save analysis pool")
|
||||
parser.add_argument(
|
||||
"--no_save_analysis_pool", dest="save_analysis_pool", action="store_false", help="Do not save analysis pool"
|
||||
)
|
||||
parser.set_defaults(save_analysis_pool=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
datasets_dir = DATA_CONFIG["datasets_dir"]
|
||||
force_update = False
|
||||
save_analysis_pool = True
|
||||
args = parse_args()
|
||||
force_update = args.force_update
|
||||
save_analysis_pool = args.save_analysis_pool
|
||||
datasets_dict = {"datasets": {}}
|
||||
solution_designer = SolutionDesigner()
|
||||
for dataset_id in OPENML_DATASET_IDS:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,12 @@ import pandas as pd
|
|||
from datasets import load_dataset
|
||||
from PIL import Image
|
||||
|
||||
from expo.data.dataset import ExpDataset, process_dataset, save_datasets_dict_to_yaml
|
||||
from expo.data.dataset import (
|
||||
ExpDataset,
|
||||
parse_args,
|
||||
process_dataset,
|
||||
save_datasets_dict_to_yaml,
|
||||
)
|
||||
from expo.insights.solution_designer import SolutionDesigner
|
||||
from expo.utils import DATA_CONFIG
|
||||
|
||||
|
|
@ -116,8 +121,9 @@ class HFExpDataset(ExpDataset):
|
|||
|
||||
if __name__ == "__main__":
|
||||
dataset_dir = DATA_CONFIG["datasets_dir"]
|
||||
save_analysis_pool = True
|
||||
force_update = False
|
||||
args = parse_args()
|
||||
force_update = args.force_update
|
||||
save_analysis_pool = args.save_analysis_pool
|
||||
datasets_dict = {"datasets": {}}
|
||||
solution_designer = SolutionDesigner()
|
||||
for dataset_meta in HFDATSETS:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue