mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-25 00:36:55 +02:00
Merge pull request #1545 from cyzus/sela-readme
indentation on readme, renaming
This commit is contained in:
commit
df51f45965
16 changed files with 284 additions and 273 deletions
|
|
@ -1,29 +1,26 @@
|
|||
# SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
|
||||
|
||||
|
||||
|
||||
## 1. Data Preparation
|
||||
|
||||
- Download Datasets:https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
|
||||
- Download and prepare datasets from scratch:
|
||||
```
|
||||
cd data
|
||||
python dataset.py --save_analysis_pool
|
||||
python hf_data.py --save_analysis_pool
|
||||
```
|
||||
You can either download the datasets from the link or prepare the datasets from scratch.
|
||||
- **Download Datasets:** [Dataset Link](https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink)
|
||||
- **Download and prepare datasets from scratch:**
|
||||
```bash
|
||||
cd data
|
||||
python dataset.py --save_analysis_pool
|
||||
python hf_data.py --save_analysis_pool
|
||||
```
|
||||
|
||||
## 2. Configs
|
||||
## 2. Configurations
|
||||
|
||||
### Data Config
|
||||
|
||||
`datasets.yaml` Provide base prompts, metrics, target columns for respective datasets
|
||||
|
||||
- Modify `datasets_dir` to the root directory of all the datasets in `data.yaml`
|
||||
|
||||
- **`datasets.yaml`:** Provide base prompts, metrics, and target columns for respective datasets.
|
||||
- **`data.yaml`:** Modify `datasets_dir` to the base directory of all prepared datasets.
|
||||
|
||||
### LLM Config
|
||||
|
||||
```
|
||||
```yaml
|
||||
llm:
|
||||
api_type: 'openai'
|
||||
model: deepseek-coder
|
||||
|
|
@ -32,237 +29,57 @@ ### LLM Config
|
|||
temperature: 0.5
|
||||
```
|
||||
|
||||
### Budget
|
||||
Experiment rollouts k = 5, 10, 20
|
||||
|
||||
|
||||
### Prompt Usage
|
||||
|
||||
- Use the function `generate_task_requirement` in `dataset.py` to get task requirement.
|
||||
- If the method is non-DI-based, set `is_di=False`.
|
||||
- Use `utils.DATA_CONFIG` as `data_config`
|
||||
|
||||
|
||||
## 3. SELA
|
||||
|
||||
### Run SELA
|
||||
|
||||
#### Setup
|
||||
In the root directory,
|
||||
|
||||
```
|
||||
```bash
|
||||
pip install -e .
|
||||
|
||||
cd expo
|
||||
cd metagpt/ext/sela
|
||||
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
#### Run
|
||||
#### Running Experiments
|
||||
|
||||
- Examples
|
||||
```
|
||||
python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
|
||||
python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better
|
||||
```
|
||||
|
||||
|
||||
- `--rollouts` - The number of rollouts
|
||||
|
||||
- `--use_fixed_insights` - In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
|
||||
|
||||
- `--low_is_better` - If the dataset has reg metric, remember to use `--low_is_better`
|
||||
|
||||
- `--from_scratch` - Do not use pre-processed insight pool, generate new insight pool based on dataset before running MCTS, facilitating subsequent tuning to propose search space prompts
|
||||
|
||||
- `--role_timeout` - The timeout for the role
|
||||
- This feature limits the duration of a single simulation, making the experiment duration more controllable (for example, if you do ten rollouts and set role_timeout to 1,000, the experiment will stop at the latest after 10,000s)
|
||||
|
||||
|
||||
- `--max_depth` - The maximum depth of MCTS, default is 4 (nodes at this depth directly return the previous simulation result without further expansion)
|
||||
|
||||
- `--load_tree` - If MCTS was interrupted due to certain reasons but had already run multiple rollouts, you can use `--load_tree`.
|
||||
- For example:
|
||||
```
|
||||
- **Examples:**
|
||||
```bash
|
||||
python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
|
||||
```
|
||||
- If this was interrupted after running three rollouts, you can use `--load_tree`:
|
||||
```
|
||||
python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree
|
||||
python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
#### Ablation Study
|
||||
- **`--rollouts`:** The number of rollouts.
|
||||
- **`--use_fixed_insights`:** Include fixed insights saved in `expo/insights/fixed_insights.json`.
|
||||
- **`--low_is_better`:** Use this if the dataset has a regression metric.
|
||||
- **`--from_scratch`:** Generate a new insight pool based on the dataset before running MCTS.
|
||||
- **`--role_timeout`:** Limits the duration of a single simulation (e.g., `10 rollouts with timeout 1,000` = max 10,000s).
|
||||
- **`--max_depth`:** Set the maximum depth of MCTS (default is 4).
|
||||
- **`--load_tree`:** Load an existing MCTS tree if the previous experiment was interrupted.
|
||||
- Example:
|
||||
```bash
|
||||
python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
|
||||
```
|
||||
- To resume:
|
||||
```bash
|
||||
python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree
|
||||
```
|
||||
|
||||
**DI RandomSearch**
|
||||
### Ablation Study
|
||||
|
||||
- Single insight
|
||||
`python run_experiment.py --exp_mode rs --task titanic --rs_mode single`
|
||||
**RandomSearch**
|
||||
|
||||
- Set insight
|
||||
`python run_experiment.py --exp_mode rs --task titanic --rs_mode set`
|
||||
- **Use a single insight:**
|
||||
```bash
|
||||
python run_experiment.py --exp_mode rs --task titanic --rs_mode single
|
||||
```
|
||||
|
||||
|
||||
## 4. Evaluation
|
||||
|
||||
Each baseline needs to produce `dev_predictions.csv`和`test_predictions.csv`. Each csv file only needs a `target` column.
|
||||
|
||||
- Use the function `evaluate_score` to evaluate.
|
||||
|
||||
#### MLE-Bench
|
||||
**Note: mle-bench requires python 3.11 or higher**
|
||||
```
|
||||
git clone https://github.com/openai/mle-bench.git
|
||||
cd mle-bench
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
```
|
||||
mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
|
||||
```
|
||||
|
||||
Enter the following command to run the experiment:
|
||||
```
|
||||
python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch --role_timeout 3600
|
||||
```
|
||||
|
||||
|
||||
## 5. Baselines
|
||||
|
||||
### AIDE
|
||||
|
||||
#### Setup
|
||||
The version of AIDE we use is dated September 30, 2024
|
||||
```
|
||||
git clone https://github.com/WecoAI/aideml.git
|
||||
git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc
|
||||
```
|
||||
|
||||
Modify `aideml/aide/utils/config.yaml` - change `k_fold_validation`, `code model`, and `feedback model` as follows:
|
||||
|
||||
```yaml
|
||||
# agent hyperparams
|
||||
agent:
|
||||
# how many improvement iterations to run
|
||||
steps: 10
|
||||
# whether to instruct the agent to use CV (set to 1 to disable)
|
||||
k_fold_validation: 1
|
||||
# LLM settings for coding
|
||||
code:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
|
||||
# LLM settings for evaluating program output / tracebacks
|
||||
feedback:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
|
||||
# hyperparameters for the tree search
|
||||
search:
|
||||
max_debug_depth: 3
|
||||
debug_prob: 0.5
|
||||
num_drafts: 5
|
||||
```
|
||||
|
||||
Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`,`api_key` into `your api key`
|
||||
|
||||
```
|
||||
export OPENAI_API_KEY="your api key"
|
||||
export OPENAI_BASE_URL="your own url"
|
||||
```
|
||||
|
||||
Modify `aideml/aide/backend/__init__.py`'s line 30 and below:
|
||||
|
||||
```python
|
||||
model_kwargs = model_kwargs | {
|
||||
"model": model,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
if "claude-" in model:
|
||||
query_func = backend_anthropic.query
|
||||
else:
|
||||
query_func = backend_openai.query
|
||||
```
|
||||
|
||||
Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312:
|
||||
|
||||
```python
|
||||
response = cast(
|
||||
dict,
|
||||
query(
|
||||
system_message=None,
|
||||
user_message=prompt,
|
||||
func_spec=review_func_spec,
|
||||
model=self.acfg.feedback.model,
|
||||
temperature=self.acfg.feedback.temp,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
Modify and install:
|
||||
|
||||
```
|
||||
cd aideml
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory
|
||||
The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide
|
||||
|
||||
```
|
||||
python experimenter/aide.py
|
||||
```
|
||||
|
||||
### Autogluon
|
||||
#### Setup
|
||||
```
|
||||
pip install -U pip
|
||||
pip install -U setuptools wheel
|
||||
pip install autogluon==1.1.1
|
||||
```
|
||||
|
||||
For Tabular data:
|
||||
```
|
||||
python run_expriment.py --exp_mode autogluon --task {task_name}
|
||||
```
|
||||
For Multimodal data:
|
||||
```
|
||||
python run_expriment.py --exp_mode autogluon --task {task_name} --is_multimodal
|
||||
```
|
||||
Replace {task_name} with the specific task you want to run.
|
||||
|
||||
|
||||
### AutoSklearn
|
||||
#### System requirements
|
||||
auto-sklearn has the following system requirements:
|
||||
|
||||
- Linux operating system (for example Ubuntu)
|
||||
|
||||
- Python (>=3.7)
|
||||
|
||||
- C++ compiler (with C++11 supports)
|
||||
|
||||
In case you try to install Auto-sklearn on a system where no wheel files for the pyrfr package are provided (see here for available wheels) you also need:
|
||||
|
||||
- SWIG [(get SWIG here).](https://www.swig.org/survey.html)
|
||||
|
||||
For an explanation of missing Microsoft Windows and macOS support please check the Section [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility).
|
||||
|
||||
#### Setup
|
||||
```
|
||||
pip install auto-sklearn==0.15.0
|
||||
```
|
||||
|
||||
#### Run
|
||||
```
|
||||
python run_experiment.py --exp_mode autosklearn --task titanic
|
||||
```
|
||||
|
||||
### Base DI
|
||||
For setup, check 4.
|
||||
- `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
|
||||
- Specifically instruct DI to use AutoGluon: `--special_instruction ag`
|
||||
- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
|
||||
- **Use a set of insights:**
|
||||
```bash
|
||||
python run_experiment.py --exp_mode rs --task titanic --rs_mode set
|
||||
```
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
datasets_dir: "path/to/datasets" # path to the datasets directory
|
||||
work_dir: ../../workspace # path to the workspace directory
|
||||
work_dir: ../../../workspace # path to the workspace directory
|
||||
role_dir: storage/SELA # path to the role directory
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
|
||||
from metagpt.ext.sela.data.dataset import SPECIAL_INSTRUCTIONS
|
||||
from metagpt.ext.sela.experimenter.mle_bench.instructions import (
|
||||
from metagpt.ext.sela.runner.mle_bench.instructions import (
|
||||
ADDITIONAL_NOTES,
|
||||
INSTRUCTIONS,
|
||||
INSTRUCTIONS_OBFUSCATED,
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ def async_timeout():
|
|||
return decorator
|
||||
|
||||
|
||||
class ResearchAssistant(DataInterpreter):
|
||||
class Experimenter(DataInterpreter):
|
||||
node_id: str = "0"
|
||||
start_task_id: int = 1
|
||||
state_saved: bool = False
|
||||
|
|
@ -78,7 +78,7 @@ class ResearchAssistant(DataInterpreter):
|
|||
self.planner.plan.task_map[str(self.start_task_id)].instruction = new_instruction
|
||||
self.remap_tasks()
|
||||
|
||||
def update_til_start_task(self, role: ResearchAssistant, backward: bool = True):
|
||||
def update_til_start_task(self, role: Experimenter, backward: bool = True):
|
||||
if backward:
|
||||
# make sure the previous task instructions are matched
|
||||
assert (
|
||||
|
|
@ -2,12 +2,12 @@ import argparse
|
|||
import asyncio
|
||||
|
||||
from metagpt.ext.sela.data.custom_task import get_mle_is_lower_better, get_mle_task_id
|
||||
from metagpt.ext.sela.experimenter.autogluon import GluonExperimenter
|
||||
from metagpt.ext.sela.experimenter.autosklearn import AutoSklearnExperimenter
|
||||
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
|
||||
from metagpt.ext.sela.experimenter.experimenter import Experimenter
|
||||
from metagpt.ext.sela.experimenter.mcts import MCTSExperimenter
|
||||
from metagpt.ext.sela.experimenter.random_search import RandomSearchExperimenter
|
||||
from metagpt.ext.sela.runner.autogluon import GluonRunner
|
||||
from metagpt.ext.sela.runner.autosklearn import AutoSklearnRunner
|
||||
from metagpt.ext.sela.runner.custom import CustomRunner
|
||||
from metagpt.ext.sela.runner.mcts import MCTSRunner
|
||||
from metagpt.ext.sela.runner.random_search import RandomSearchRunner
|
||||
from metagpt.ext.sela.runner.runner import Runner
|
||||
|
||||
|
||||
def get_args(cmd=True):
|
||||
|
|
@ -74,24 +74,24 @@ def get_di_args(parser):
|
|||
|
||||
async def main(args):
|
||||
if args.exp_mode == "mcts":
|
||||
experimenter = MCTSExperimenter(args)
|
||||
runner = MCTSRunner(args)
|
||||
elif args.exp_mode == "greedy":
|
||||
experimenter = MCTSExperimenter(args, tree_mode="greedy")
|
||||
runner = MCTSRunner(args, tree_mode="greedy")
|
||||
elif args.exp_mode == "random":
|
||||
experimenter = MCTSExperimenter(args, tree_mode="random")
|
||||
runner = MCTSRunner(args, tree_mode="random")
|
||||
elif args.exp_mode == "rs":
|
||||
experimenter = RandomSearchExperimenter(args)
|
||||
runner = RandomSearchRunner(args)
|
||||
elif args.exp_mode == "base":
|
||||
experimenter = Experimenter(args)
|
||||
runner = Runner(args)
|
||||
elif args.exp_mode == "autogluon":
|
||||
experimenter = GluonExperimenter(args)
|
||||
runner = GluonRunner(args)
|
||||
elif args.exp_mode == "custom":
|
||||
experimenter = CustomExperimenter(args)
|
||||
runner = CustomRunner(args)
|
||||
elif args.exp_mode == "autosklearn":
|
||||
experimenter = AutoSklearnExperimenter(args)
|
||||
runner = AutoSklearnRunner(args)
|
||||
else:
|
||||
raise ValueError(f"Invalid exp_mode: {args.exp_mode}")
|
||||
await experimenter.run_experiment()
|
||||
await runner.run_experiment()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
198
metagpt/ext/sela/runner/README.md
Normal file
198
metagpt/ext/sela/runner/README.md
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
# SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
|
||||
|
||||
This document provides instructions for running baseline models. To start with, ensure that you prepare the datasets as instructed in `sela/README.md`.
|
||||
|
||||
## Baselines
|
||||
|
||||
### 1. AIDE
|
||||
|
||||
#### Setup
|
||||
|
||||
We use the AIDE version from September 30, 2024. Clone the repository and check out the specified commit:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/WecoAI/aideml.git
|
||||
git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc
|
||||
```
|
||||
|
||||
|
||||
Modify `aideml/aide/utils/config.yaml` to set the following parameters:
|
||||
|
||||
```yaml
|
||||
# agent hyperparams
|
||||
agent:
|
||||
steps: 10 # Number of improvement iterations
|
||||
k_fold_validation: 1 # Set to 1 to disable cross-validation
|
||||
code:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
feedback:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
search:
|
||||
max_debug_depth: 3
|
||||
debug_prob: 0.5
|
||||
num_drafts: 5
|
||||
```
|
||||
|
||||
Update your OpenAI API credentials in the environment:
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="your api key"
|
||||
export OPENAI_BASE_URL="your own url"
|
||||
```
|
||||
|
||||
Modify `aideml/aide/backend/__init__.py` (line 30 and below):
|
||||
|
||||
```python
|
||||
model_kwargs = model_kwargs | {
|
||||
"model": model,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
if "claude-" in model:
|
||||
query_func = backend_anthropic.query
|
||||
else:
|
||||
query_func = backend_openai.query
|
||||
```
|
||||
|
||||
Since Deepseek V2.5 no longer supports system messages using function calls, modify `aideml/aide/agent.py` (line 312):
|
||||
|
||||
```python
|
||||
response = cast(
|
||||
dict,
|
||||
query(
|
||||
system_message=None,
|
||||
user_message=prompt,
|
||||
func_spec=review_func_spec,
|
||||
model=self.acfg.feedback.model,
|
||||
temperature=self.acfg.feedback.temp,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
Finally, install AIDE:
|
||||
|
||||
```bash
|
||||
cd aideml
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
Execute the following script to generate results. A `log` folder (containing experimental configurations) and a `workspace` folder (storing final results) will be created:
|
||||
|
||||
```bash
|
||||
python runner/aide.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Autogluon
|
||||
|
||||
#### Setup
|
||||
|
||||
Install Autogluon:
|
||||
|
||||
```bash
|
||||
pip install -U pip
|
||||
pip install -U setuptools wheel
|
||||
pip install autogluon==1.1.1
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
For Tabular data:
|
||||
|
||||
```bash
|
||||
python run_experiment.py --exp_mode autogluon --task {task_name}
|
||||
```
|
||||
|
||||
For Multimodal data:
|
||||
|
||||
```bash
|
||||
python run_experiment.py --exp_mode autogluon --task {task_name} --is_multimodal
|
||||
```
|
||||
|
||||
Replace `{task_name}` with the specific task you want to run.
|
||||
|
||||
---
|
||||
|
||||
### 3. AutoSklearn
|
||||
|
||||
**Note:**
|
||||
AutoSklearn requires:
|
||||
- Linux operating system (e.g., Ubuntu)
|
||||
- Python (>=3.7)
|
||||
- C++ compiler (with C++11 support)
|
||||
|
||||
If installing on a system without wheel files for the `pyrfr` package, you also need:
|
||||
|
||||
- [SWIG](https://www.swig.org/survey.html)
|
||||
|
||||
Refer to the [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility) section for further details.
|
||||
|
||||
#### Setup
|
||||
|
||||
Install AutoSklearn:
|
||||
|
||||
```bash
|
||||
pip install auto-sklearn==0.15.0
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
Execute the following command for the Titanic task:
|
||||
|
||||
```bash
|
||||
python run_experiment.py --exp_mode autosklearn --task titanic
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Base Data Interpreter
|
||||
|
||||
Run the following command for the Titanic task:
|
||||
|
||||
```bash
|
||||
python run_experiment.py --exp_mode base --task titanic --num_experiments 10
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. Custom Baselines
|
||||
|
||||
To run additional baselines:
|
||||
|
||||
- Each baseline must produce `dev_predictions.csv` and `test_predictions.csv` with a `target` column.
|
||||
- Use the `evaluate_score` function for evaluation.
|
||||
|
||||
---
|
||||
|
||||
## MLE-Bench
|
||||
|
||||
**Note:** MLE-Bench requires Python 3.11 or higher.
|
||||
|
||||
#### Setup
|
||||
|
||||
Clone the repository and install:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/openai/mle-bench.git
|
||||
cd mle-bench
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
Prepare the data:
|
||||
|
||||
```bash
|
||||
mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
|
||||
```
|
||||
|
||||
#### Run the MLE-Bench Experiment
|
||||
|
||||
Run the following command to execute the experiment:
|
||||
|
||||
```bash
|
||||
python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch --role_timeout 3600
|
||||
```
|
||||
|
|
@ -3,7 +3,7 @@ from datetime import datetime
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
|
||||
from metagpt.ext.sela.runner.custom import CustomRunner
|
||||
|
||||
|
||||
class AGRunner:
|
||||
|
|
@ -102,7 +102,7 @@ class AGRunner:
|
|||
return train_data, dev_data, dev_wo_target_data, test_data
|
||||
|
||||
|
||||
class GluonExperimenter(CustomExperimenter):
|
||||
class GluonRunner(CustomRunner):
|
||||
result_path: str = "results/autogluon"
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
|
|
@ -4,7 +4,7 @@ from functools import partial
|
|||
import pandas as pd
|
||||
|
||||
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
|
||||
from metagpt.ext.sela.experimenter.custom import CustomExperimenter
|
||||
from metagpt.ext.sela.runner.custom import CustomRunner
|
||||
|
||||
|
||||
def custom_scorer(y_true, y_pred, metric_name):
|
||||
|
|
@ -69,7 +69,7 @@ class ASRunner:
|
|||
return {"test_preds": test_preds, "dev_preds": dev_preds}
|
||||
|
||||
|
||||
class AutoSklearnExperimenter(CustomExperimenter):
|
||||
class AutoSklearnRunner(CustomRunner):
|
||||
result_path: str = "results/autosklearn"
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
|
|
@ -3,11 +3,11 @@ import os
|
|||
import pandas as pd
|
||||
|
||||
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
|
||||
from metagpt.ext.sela.experimenter.experimenter import Experimenter
|
||||
from metagpt.ext.sela.runner.runner import Runner
|
||||
from metagpt.ext.sela.search.tree_search import create_initial_state
|
||||
|
||||
|
||||
class CustomExperimenter(Experimenter):
|
||||
class CustomRunner(Runner):
|
||||
result_path: str = "results/custom"
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
|
|
@ -5,11 +5,11 @@ from metagpt.ext.sela.evaluation.evaluation import (
|
|||
node_evaluate_score_sela,
|
||||
)
|
||||
from metagpt.ext.sela.evaluation.visualize_mcts import get_tree_text
|
||||
from metagpt.ext.sela.experimenter.experimenter import Experimenter
|
||||
from metagpt.ext.sela.runner.runner import Runner
|
||||
from metagpt.ext.sela.search.search_algorithm import MCTS, Greedy, Random
|
||||
|
||||
|
||||
class MCTSExperimenter(Experimenter):
|
||||
class MCTSRunner(Runner):
|
||||
result_path: str = "results/mcts"
|
||||
|
||||
def __init__(self, args, tree_mode=None, **kwargs):
|
||||
|
|
@ -8,7 +8,7 @@ You will be given a machine learning task. You must solve the task by training a
|
|||
|
||||
BENCHMARK INSTRUCTIONS
|
||||
------
|
||||
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
|
||||
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`.
|
||||
|
||||
- **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving.
|
||||
- **Dataset**: The dataset files for the competition are available in `{dataset_dir}`.
|
||||
|
|
@ -28,7 +28,7 @@ You will be given a machine learning task. You must solve the task by training a
|
|||
|
||||
BENCHMARK INSTRUCTIONS
|
||||
------
|
||||
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
|
||||
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`.
|
||||
|
||||
- **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving.
|
||||
- **Dataset**: The dataset files for the task are available in `{dataset_dir}/`.
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
from metagpt.ext.sela.experimenter.experimenter import Experimenter
|
||||
from metagpt.ext.sela.experimenter import Experimenter
|
||||
from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
|
||||
from metagpt.ext.sela.research_assistant import ResearchAssistant
|
||||
from metagpt.ext.sela.runner.runner import Runner
|
||||
from metagpt.ext.sela.utils import get_exp_pool_path
|
||||
|
||||
EXPS_PROMPT = """
|
||||
|
|
@ -10,7 +10,7 @@ When doing the tasks, you can refer to the insights below:
|
|||
"""
|
||||
|
||||
|
||||
class RandomSearchExperimenter(Experimenter):
|
||||
class RandomSearchRunner(Runner):
|
||||
result_path: str = "results/random_search"
|
||||
|
||||
async def run_experiment(self):
|
||||
|
|
@ -34,9 +34,7 @@ class RandomSearchExperimenter(Experimenter):
|
|||
|
||||
results = []
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(
|
||||
node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
|
||||
)
|
||||
di = Experimenter(node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout)
|
||||
di.role_dir = f"{di.role_dir}_{self.args.task}"
|
||||
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
|
||||
print(requirement)
|
||||
|
|
@ -6,12 +6,12 @@ import numpy as np
|
|||
import pandas as pd
|
||||
|
||||
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
|
||||
from metagpt.ext.sela.research_assistant import ResearchAssistant
|
||||
from metagpt.ext.sela.experimenter import Experimenter
|
||||
from metagpt.ext.sela.search.tree_search import create_initial_state
|
||||
from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook
|
||||
|
||||
|
||||
class Experimenter:
|
||||
class Runner:
|
||||
result_path: str = "results/base"
|
||||
data_config = DATA_CONFIG
|
||||
start_task_id = 1
|
||||
|
|
@ -83,9 +83,7 @@ class Experimenter:
|
|||
results = []
|
||||
|
||||
for i in range(self.args.num_experiments):
|
||||
di = ResearchAssistant(
|
||||
node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
|
||||
)
|
||||
di = Experimenter(node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout)
|
||||
score_dict = await self.run_di(di, user_requirement, run_idx=i)
|
||||
results.append(
|
||||
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
|
||||
|
|
@ -15,8 +15,8 @@ from metagpt.ext.sela.data.dataset import (
|
|||
get_split_dataset_path,
|
||||
)
|
||||
from metagpt.ext.sela.evaluation.evaluation import evaluate_score
|
||||
from metagpt.ext.sela.experimenter import Experimenter, TimeoutException
|
||||
from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
|
||||
from metagpt.ext.sela.research_assistant import ResearchAssistant, TimeoutException
|
||||
from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger
|
||||
from metagpt.tools.tool_recommend import ToolRecommender
|
||||
from metagpt.utils.common import read_json_file
|
||||
|
|
@ -44,9 +44,9 @@ def initialize_di_root_node(state: dict, reflection: bool = True):
|
|||
reflection (bool, optional): Whether to use reflection. Defaults to True.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the ResearchAssistant role and the root Node.
|
||||
tuple: A tuple containing the Experimenter role and the root Node.
|
||||
"""
|
||||
role = ResearchAssistant(
|
||||
role = Experimenter(
|
||||
node_id="0",
|
||||
start_task_id=state["start_task_id"],
|
||||
use_reflection=reflection,
|
||||
|
|
@ -204,14 +204,14 @@ class Node:
|
|||
role_dict["tool_recommender"] = ToolRecommender()
|
||||
elif isinstance(role_dict.get("tool_recommender", {}).get("tools"), dict):
|
||||
role_dict["tool_recommender"]["tools"] = list(role_dict["tool_recommender"]["tools"].keys())
|
||||
role = ResearchAssistant(**role_dict)
|
||||
role = Experimenter(**role_dict)
|
||||
if self.parent is not None: # TODO: Check this
|
||||
parent_role = self.parent.load_role()
|
||||
role.update_til_start_task(parent_role, backward=False)
|
||||
role.remap_tasks()
|
||||
return role
|
||||
|
||||
def save_new_role(self, role: ResearchAssistant):
|
||||
def save_new_role(self, role: Experimenter):
|
||||
role.node_id = self.id
|
||||
role.start_task_id = self.state["start_task_id"]
|
||||
role.state_saved = False
|
||||
|
|
@ -268,7 +268,7 @@ class Node:
|
|||
self.get_and_move_predictions("test")
|
||||
return score_dict
|
||||
|
||||
async def run_node(self, role: ResearchAssistant = None):
|
||||
async def run_node(self, role: Experimenter = None):
|
||||
if self.is_terminal() and role is not None:
|
||||
if role.state_saved:
|
||||
return self.raw_reward
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue