From 1aac79c3b379c47b0b80ad8efa216d417b3494a2 Mon Sep 17 00:00:00 2001
From: Cyzus Chi <chieftain@berkeley.edu>
Date: Mon, 28 Oct 2024 21:05:59 +0800
Subject: [PATCH 1/4] identation on readme

---
 metagpt/ext/sela/README.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md
index c8df4eeba..3fa03ee08 100644
--- a/metagpt/ext/sela/README.md
+++ b/metagpt/ext/sela/README.md
@@ -179,25 +179,25 @@   # hyperparameters for the tree search
         "temperature": temperature,
         "max_tokens": max_tokens,
     }
-    if "claude-" in model:
-        query_func = backend_anthropic.query
-    else:
-        query_func = backend_openai.query
+if "claude-" in model:
+  query_func = backend_anthropic.query
+else:
+  query_func = backend_openai.query
 ```
 
 Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312:
 
 ```python
 response = cast(
-            dict,
-            query(
-                system_message=None,
-                user_message=prompt,
-                func_spec=review_func_spec,
-                model=self.acfg.feedback.model,
-                temperature=self.acfg.feedback.temp,
-            ),
-        )
+    dict,
+    query(
+        system_message=None,
+        user_message=prompt,
+        func_spec=review_func_spec,
+        model=self.acfg.feedback.model,
+        temperature=self.acfg.feedback.temp,
+    ),
+)
 ```
 
 Modify and install:

From e0cbbf82f437a525412a730436acd6923ca4e75d Mon Sep 17 00:00:00 2001
From: Cyzus Chi <chieftain@berkeley.edu>
Date: Mon, 28 Oct 2024 21:42:46 +0800
Subject: [PATCH 2/4] rename research assistant to experimenter

---
 metagpt/ext/sela/README.md                    |  2 +-
 metagpt/ext/sela/data/custom_task.py          |  2 +-
 ...{research_assistant.py => experimenter.py} |  0
 metagpt/ext/sela/run_experiment.py            | 30 +++++++++----------
 .../sela/{experimenter => runner}/__init__.py |  0
 .../ext/sela/{experimenter => runner}/aide.py |  0
 .../{experimenter => runner}/autogluon.py     |  4 +--
 .../{experimenter => runner}/autosklearn.py   |  4 +--
 .../sela/{experimenter => runner}/custom.py   |  4 +--
 .../ext/sela/{experimenter => runner}/mcts.py |  4 +--
 .../mle_bench/instructions.py                 |  4 +--
 .../{experimenter => runner}/random_search.py |  6 ++--
 .../experimenter.py => runner/runner.py}      |  4 +--
 metagpt/ext/sela/search/tree_search.py        |  2 +-
 14 files changed, 33 insertions(+), 33 deletions(-)
 rename metagpt/ext/sela/{research_assistant.py => experimenter.py} (100%)
 rename metagpt/ext/sela/{experimenter => runner}/__init__.py (100%)
 rename metagpt/ext/sela/{experimenter => runner}/aide.py (100%)
 rename metagpt/ext/sela/{experimenter => runner}/autogluon.py (98%)
 rename metagpt/ext/sela/{experimenter => runner}/autosklearn.py (96%)
 rename metagpt/ext/sela/{experimenter => runner}/custom.py (95%)
 rename metagpt/ext/sela/{experimenter => runner}/mcts.py (96%)
 rename metagpt/ext/sela/{experimenter => runner}/mle_bench/instructions.py (98%)
 rename metagpt/ext/sela/{experimenter => runner}/random_search.py (92%)
 rename metagpt/ext/sela/{experimenter/experimenter.py => runner/runner.py} (98%)

diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md
index 3fa03ee08..829306e36 100644
--- a/metagpt/ext/sela/README.md
+++ b/metagpt/ext/sela/README.md
@@ -213,7 +213,7 @@ #### Run
 The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide
 
 ```
-python experimenter/aide.py
+python runner/aide.py
 ```
 
 ### Autogluon
diff --git a/metagpt/ext/sela/data/custom_task.py b/metagpt/ext/sela/data/custom_task.py
index 3371d5b1c..08a7cbabb 100644
--- a/metagpt/ext/sela/data/custom_task.py
+++ b/metagpt/ext/sela/data/custom_task.py
@@ -1,7 +1,7 @@
 import os
 
 from metagpt.ext.sela.data.dataset import SPECIAL_INSTRUCTIONS
-from metagpt.ext.sela.experimenter.mle_bench.instructions import (
+from metagpt.ext.sela.runner.mle_bench.instructions import (
     ADDITIONAL_NOTES,
     INSTRUCTIONS,
     INSTRUCTIONS_OBFUSCATED,
diff --git a/metagpt/ext/sela/research_assistant.py b/metagpt/ext/sela/experimenter.py
similarity index 100%
rename from metagpt/ext/sela/research_assistant.py
rename to metagpt/ext/sela/experimenter.py
diff --git a/metagpt/ext/sela/run_experiment.py b/metagpt/ext/sela/run_experiment.py
index 4cced19c3..32130a6fb 100644
--- a/metagpt/ext/sela/run_experiment.py
+++ b/metagpt/ext/sela/run_experiment.py
@@ -2,12 +2,12 @@ import argparse
 import asyncio
 
 from metagpt.ext.sela.data.custom_task import get_mle_is_lower_better, get_mle_task_id
-from metagpt.ext.sela.experimenter.autogluon import GluonExperimenter
-from metagpt.ext.sela.experimenter.autosklearn import AutoSklearnExperimenter
-from metagpt.ext.sela.experimenter.custom import CustomExperimenter
-from metagpt.ext.sela.experimenter.experimenter import Experimenter
-from metagpt.ext.sela.experimenter.mcts import MCTSExperimenter
-from metagpt.ext.sela.experimenter.random_search import RandomSearchExperimenter
+from metagpt.ext.sela.runner.autogluon import GluonRunner
+from metagpt.ext.sela.runner.autosklearn import AutoSklearnRunner
+from metagpt.ext.sela.runner.custom import CustomRunner
+from metagpt.ext.sela.runner.mcts import MCTSRunner
+from metagpt.ext.sela.runner.random_search import RandomSearchRunner
+from metagpt.ext.sela.runner.runner import Runner
 
 
 def get_args(cmd=True):
@@ -74,24 +74,24 @@ def get_di_args(parser):
 
 async def main(args):
     if args.exp_mode == "mcts":
-        experimenter = MCTSExperimenter(args)
+        runner = MCTSRunner(args)
     elif args.exp_mode == "greedy":
-        experimenter = MCTSExperimenter(args, tree_mode="greedy")
+        runner = MCTSRunner(args, tree_mode="greedy")
     elif args.exp_mode == "random":
-        experimenter = MCTSExperimenter(args, tree_mode="random")
+        runner = MCTSRunner(args, tree_mode="random")
     elif args.exp_mode == "rs":
-        experimenter = RandomSearchExperimenter(args)
+        runner = RandomSearchRunner(args)
     elif args.exp_mode == "base":
-        experimenter = Experimenter(args)
+        runner = Runner(args)
     elif args.exp_mode == "autogluon":
-        experimenter = GluonExperimenter(args)
+        runner = GluonRunner(args)
     elif args.exp_mode == "custom":
-        experimenter = CustomExperimenter(args)
+        runner = CustomRunner(args)
     elif args.exp_mode == "autosklearn":
-        experimenter = AutoSklearnExperimenter(args)
+        runner = AutoSklearnRunner(args)
     else:
         raise ValueError(f"Invalid exp_mode: {args.exp_mode}")
-    await experimenter.run_experiment()
+    await runner.run_experiment()
 
 
 if __name__ == "__main__":
diff --git a/metagpt/ext/sela/experimenter/__init__.py b/metagpt/ext/sela/runner/__init__.py
similarity index 100%
rename from metagpt/ext/sela/experimenter/__init__.py
rename to metagpt/ext/sela/runner/__init__.py
diff --git a/metagpt/ext/sela/experimenter/aide.py b/metagpt/ext/sela/runner/aide.py
similarity index 100%
rename from metagpt/ext/sela/experimenter/aide.py
rename to metagpt/ext/sela/runner/aide.py
diff --git a/metagpt/ext/sela/experimenter/autogluon.py b/metagpt/ext/sela/runner/autogluon.py
similarity index 98%
rename from metagpt/ext/sela/experimenter/autogluon.py
rename to metagpt/ext/sela/runner/autogluon.py
index f547ce4ba..48737da04 100644
--- a/metagpt/ext/sela/experimenter/autogluon.py
+++ b/metagpt/ext/sela/runner/autogluon.py
@@ -3,7 +3,7 @@ from datetime import datetime
 
 import pandas as pd
 
-from metagpt.ext.sela.experimenter.custom import CustomExperimenter
+from metagpt.ext.sela.runner.custom import CustomRunner
 
 
 class AGRunner:
@@ -102,7 +102,7 @@ class AGRunner:
         return train_data, dev_data, dev_wo_target_data, test_data
 
 
-class GluonExperimenter(CustomExperimenter):
+class GluonRunner(CustomRunner):
     result_path: str = "results/autogluon"
 
     def __init__(self, args, **kwargs):
diff --git a/metagpt/ext/sela/experimenter/autosklearn.py b/metagpt/ext/sela/runner/autosklearn.py
similarity index 96%
rename from metagpt/ext/sela/experimenter/autosklearn.py
rename to metagpt/ext/sela/runner/autosklearn.py
index f6ff267e7..7d0eb364e 100644
--- a/metagpt/ext/sela/experimenter/autosklearn.py
+++ b/metagpt/ext/sela/runner/autosklearn.py
@@ -4,7 +4,7 @@ from functools import partial
 import pandas as pd
 
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
-from metagpt.ext.sela.experimenter.custom import CustomExperimenter
+from metagpt.ext.sela.runner.custom import CustomRunner
 
 
 def custom_scorer(y_true, y_pred, metric_name):
@@ -69,7 +69,7 @@ class ASRunner:
         return {"test_preds": test_preds, "dev_preds": dev_preds}
 
 
-class AutoSklearnExperimenter(CustomExperimenter):
+class AutoSklearnRunner(CustomRunner):
     result_path: str = "results/autosklearn"
 
     def __init__(self, args, **kwargs):
diff --git a/metagpt/ext/sela/experimenter/custom.py b/metagpt/ext/sela/runner/custom.py
similarity index 95%
rename from metagpt/ext/sela/experimenter/custom.py
rename to metagpt/ext/sela/runner/custom.py
index 70df1a78e..e9a8ee276 100644
--- a/metagpt/ext/sela/experimenter/custom.py
+++ b/metagpt/ext/sela/runner/custom.py
@@ -3,11 +3,11 @@ import os
 import pandas as pd
 
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
-from metagpt.ext.sela.experimenter.experimenter import Experimenter
+from metagpt.ext.sela.runner.runner import Runner
 from metagpt.ext.sela.search.tree_search import create_initial_state
 
 
-class CustomExperimenter(Experimenter):
+class CustomRunner(Runner):
     result_path: str = "results/custom"
 
     def __init__(self, args, **kwargs):
diff --git a/metagpt/ext/sela/experimenter/mcts.py b/metagpt/ext/sela/runner/mcts.py
similarity index 96%
rename from metagpt/ext/sela/experimenter/mcts.py
rename to metagpt/ext/sela/runner/mcts.py
index 9fd66121d..8b6c14100 100644
--- a/metagpt/ext/sela/experimenter/mcts.py
+++ b/metagpt/ext/sela/runner/mcts.py
@@ -5,11 +5,11 @@ from metagpt.ext.sela.evaluation.evaluation import (
     node_evaluate_score_sela,
 )
 from metagpt.ext.sela.evaluation.visualize_mcts import get_tree_text
-from metagpt.ext.sela.experimenter.experimenter import Experimenter
+from metagpt.ext.sela.runner.runner import Runner
 from metagpt.ext.sela.search.search_algorithm import MCTS, Greedy, Random
 
 
-class MCTSExperimenter(Experimenter):
+class MCTSRunner(Runner):
     result_path: str = "results/mcts"
 
     def __init__(self, args, tree_mode=None, **kwargs):
diff --git a/metagpt/ext/sela/experimenter/mle_bench/instructions.py b/metagpt/ext/sela/runner/mle_bench/instructions.py
similarity index 98%
rename from metagpt/ext/sela/experimenter/mle_bench/instructions.py
rename to metagpt/ext/sela/runner/mle_bench/instructions.py
index 5c9c7b70f..136726bcf 100644
--- a/metagpt/ext/sela/experimenter/mle_bench/instructions.py
+++ b/metagpt/ext/sela/runner/mle_bench/instructions.py
@@ -8,7 +8,7 @@ You will be given a machine learning task. You must solve the task by training a
 
 BENCHMARK INSTRUCTIONS
 ------
-First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
+First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`.
 
 - **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving.
 - **Dataset**: The dataset files for the competition are available in `{dataset_dir}`.
@@ -28,7 +28,7 @@ You will be given a machine learning task. You must solve the task by training a
 
 BENCHMARK INSTRUCTIONS
 ------
-First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `experimenter/mle_bench/instructions.py`.
+First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `runner/mle_bench/instructions.py`.
 
 - **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving.
 - **Dataset**: The dataset files for the task are available in `{dataset_dir}/`.
diff --git a/metagpt/ext/sela/experimenter/random_search.py b/metagpt/ext/sela/runner/random_search.py
similarity index 92%
rename from metagpt/ext/sela/experimenter/random_search.py
rename to metagpt/ext/sela/runner/random_search.py
index 5617ee601..8ce42f0ff 100644
--- a/metagpt/ext/sela/experimenter/random_search.py
+++ b/metagpt/ext/sela/runner/random_search.py
@@ -1,6 +1,6 @@
-from metagpt.ext.sela.experimenter.experimenter import Experimenter
+from metagpt.ext.sela.experimenter import ResearchAssistant
 from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
-from metagpt.ext.sela.research_assistant import ResearchAssistant
+from metagpt.ext.sela.runner.runner import Runner
 from metagpt.ext.sela.utils import get_exp_pool_path
 
 EXPS_PROMPT = """
@@ -10,7 +10,7 @@ When doing the tasks, you can refer to the insights below:
 """
 
 
-class RandomSearchExperimenter(Experimenter):
+class RandomSearchRunner(Runner):
     result_path: str = "results/random_search"
 
     async def run_experiment(self):
diff --git a/metagpt/ext/sela/experimenter/experimenter.py b/metagpt/ext/sela/runner/runner.py
similarity index 98%
rename from metagpt/ext/sela/experimenter/experimenter.py
rename to metagpt/ext/sela/runner/runner.py
index 3df46b74b..7ab83c6c3 100644
--- a/metagpt/ext/sela/experimenter/experimenter.py
+++ b/metagpt/ext/sela/runner/runner.py
@@ -6,12 +6,12 @@ import numpy as np
 import pandas as pd
 
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
-from metagpt.ext.sela.research_assistant import ResearchAssistant
+from metagpt.ext.sela.experimenter import ResearchAssistant
 from metagpt.ext.sela.search.tree_search import create_initial_state
 from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook
 
 
-class Experimenter:
+class Runner:
     result_path: str = "results/base"
     data_config = DATA_CONFIG
     start_task_id = 1
diff --git a/metagpt/ext/sela/search/tree_search.py b/metagpt/ext/sela/search/tree_search.py
index cde8dc82a..684426fe6 100644
--- a/metagpt/ext/sela/search/tree_search.py
+++ b/metagpt/ext/sela/search/tree_search.py
@@ -15,8 +15,8 @@ from metagpt.ext.sela.data.dataset import (
     get_split_dataset_path,
 )
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
+from metagpt.ext.sela.experimenter import ResearchAssistant, TimeoutException
 from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
-from metagpt.ext.sela.research_assistant import ResearchAssistant, TimeoutException
 from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger
 from metagpt.tools.tool_recommend import ToolRecommender
 from metagpt.utils.common import read_json_file

From 25299e1f127b9fa3baa04ac057122375a5bde6ee Mon Sep 17 00:00:00 2001
From: Cyzus Chi <chieftain@berkeley.edu>
Date: Tue, 29 Oct 2024 14:24:38 +0800
Subject: [PATCH 3/4] change research assistant to experimenter

---
 metagpt/ext/sela/data.yaml               |  2 +-
 metagpt/ext/sela/experimenter.py         |  4 ++--
 metagpt/ext/sela/runner/random_search.py |  6 ++----
 metagpt/ext/sela/runner/runner.py        |  6 ++----
 metagpt/ext/sela/search/tree_search.py   | 12 ++++++------
 5 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/metagpt/ext/sela/data.yaml b/metagpt/ext/sela/data.yaml
index 5f4a290ea..7da5dbb3c 100644
--- a/metagpt/ext/sela/data.yaml
+++ b/metagpt/ext/sela/data.yaml
@@ -1,3 +1,3 @@
 datasets_dir: "path/to/datasets" # path to the datasets directory
-work_dir: ../../workspace # path to the workspace directory
+work_dir: ../../../workspace # path to the workspace directory
 role_dir: storage/SELA # path to the role directory
\ No newline at end of file
diff --git a/metagpt/ext/sela/experimenter.py b/metagpt/ext/sela/experimenter.py
index 2c698c1d2..b05ea2fc3 100644
--- a/metagpt/ext/sela/experimenter.py
+++ b/metagpt/ext/sela/experimenter.py
@@ -60,7 +60,7 @@ def async_timeout():
     return decorator
 
 
-class ResearchAssistant(DataInterpreter):
+class Experimenter(DataInterpreter):
     node_id: str = "0"
     start_task_id: int = 1
     state_saved: bool = False
@@ -78,7 +78,7 @@ class ResearchAssistant(DataInterpreter):
             self.planner.plan.task_map[str(self.start_task_id)].instruction = new_instruction
             self.remap_tasks()
 
-    def update_til_start_task(self, role: ResearchAssistant, backward: bool = True):
+    def update_til_start_task(self, role: Experimenter, backward: bool = True):
         if backward:
             # make sure the previous task instructions are matched
             assert (
diff --git a/metagpt/ext/sela/runner/random_search.py b/metagpt/ext/sela/runner/random_search.py
index 8ce42f0ff..b1f43ac0c 100644
--- a/metagpt/ext/sela/runner/random_search.py
+++ b/metagpt/ext/sela/runner/random_search.py
@@ -1,4 +1,4 @@
-from metagpt.ext.sela.experimenter import ResearchAssistant
+from metagpt.ext.sela.experimenter import Experimenter
 from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
 from metagpt.ext.sela.runner.runner import Runner
 from metagpt.ext.sela.utils import get_exp_pool_path
@@ -34,9 +34,7 @@ class RandomSearchRunner(Runner):
 
         results = []
         for i in range(self.args.num_experiments):
-            di = ResearchAssistant(
-                node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
-            )
+            di = Experimenter(node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout)
             di.role_dir = f"{di.role_dir}_{self.args.task}"
             requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
             print(requirement)
diff --git a/metagpt/ext/sela/runner/runner.py b/metagpt/ext/sela/runner/runner.py
index 7ab83c6c3..4b5504e09 100644
--- a/metagpt/ext/sela/runner/runner.py
+++ b/metagpt/ext/sela/runner/runner.py
@@ -6,7 +6,7 @@ import numpy as np
 import pandas as pd
 
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
-from metagpt.ext.sela.experimenter import ResearchAssistant
+from metagpt.ext.sela.experimenter import Experimenter
 from metagpt.ext.sela.search.tree_search import create_initial_state
 from metagpt.ext.sela.utils import DATA_CONFIG, save_notebook
 
@@ -83,9 +83,7 @@ class Runner:
         results = []
 
         for i in range(self.args.num_experiments):
-            di = ResearchAssistant(
-                node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
-            )
+            di = Experimenter(node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout)
             score_dict = await self.run_di(di, user_requirement, run_idx=i)
             results.append(
                 {"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
diff --git a/metagpt/ext/sela/search/tree_search.py b/metagpt/ext/sela/search/tree_search.py
index 684426fe6..eac26c86c 100644
--- a/metagpt/ext/sela/search/tree_search.py
+++ b/metagpt/ext/sela/search/tree_search.py
@@ -15,7 +15,7 @@ from metagpt.ext.sela.data.dataset import (
     get_split_dataset_path,
 )
 from metagpt.ext.sela.evaluation.evaluation import evaluate_score
-from metagpt.ext.sela.experimenter import ResearchAssistant, TimeoutException
+from metagpt.ext.sela.experimenter import Experimenter, TimeoutException
 from metagpt.ext.sela.insights.instruction_generator import InstructionGenerator
 from metagpt.ext.sela.utils import get_exp_pool_path, load_execute_notebook, mcts_logger
 from metagpt.tools.tool_recommend import ToolRecommender
@@ -44,9 +44,9 @@ def initialize_di_root_node(state: dict, reflection: bool = True):
         reflection (bool, optional): Whether to use reflection. Defaults to True.
 
     Returns:
-        tuple: A tuple containing the ResearchAssistant role and the root Node.
+        tuple: A tuple containing the Experimenter role and the root Node.
     """
-    role = ResearchAssistant(
+    role = Experimenter(
         node_id="0",
         start_task_id=state["start_task_id"],
         use_reflection=reflection,
@@ -204,14 +204,14 @@ class Node:
             role_dict["tool_recommender"] = ToolRecommender()
         elif isinstance(role_dict.get("tool_recommender", {}).get("tools"), dict):
             role_dict["tool_recommender"]["tools"] = list(role_dict["tool_recommender"]["tools"].keys())
-        role = ResearchAssistant(**role_dict)
+        role = Experimenter(**role_dict)
         if self.parent is not None:  # TODO: Check this
             parent_role = self.parent.load_role()
             role.update_til_start_task(parent_role, backward=False)
         role.remap_tasks()
         return role
 
-    def save_new_role(self, role: ResearchAssistant):
+    def save_new_role(self, role: Experimenter):
         role.node_id = self.id
         role.start_task_id = self.state["start_task_id"]
         role.state_saved = False
@@ -268,7 +268,7 @@ class Node:
             self.get_and_move_predictions("test")
         return score_dict
 
-    async def run_node(self, role: ResearchAssistant = None):
+    async def run_node(self, role: Experimenter = None):
         if self.is_terminal() and role is not None:
             if role.state_saved:
                 return self.raw_reward

From 37698b3f636d7d2ffdd8f8f754084b99b81b2158 Mon Sep 17 00:00:00 2001
From: Cyzus Chi <chieftain@berkeley.edu>
Date: Tue, 29 Oct 2024 14:55:39 +0800
Subject: [PATCH 4/4] update readme - put baseline readme in /runner

---
 metagpt/ext/sela/README.md        | 271 +++++-------------------------
 metagpt/ext/sela/runner/README.md | 198 ++++++++++++++++++++++
 2 files changed, 242 insertions(+), 227 deletions(-)
 create mode 100644 metagpt/ext/sela/runner/README.md

diff --git a/metagpt/ext/sela/README.md b/metagpt/ext/sela/README.md
index 829306e36..a942fdb7d 100644
--- a/metagpt/ext/sela/README.md
+++ b/metagpt/ext/sela/README.md
@@ -1,29 +1,26 @@
 # SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
 
-
-
 ## 1. Data Preparation
 
-- Download Datasets：https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink
-- Download and prepare datasets from scratch:
-```
-cd data
-python dataset.py --save_analysis_pool
-python hf_data.py --save_analysis_pool
-```
+You can either download the datasets from the link or prepare the datasets from scratch.
+- **Download Datasets:** [Dataset Link](https://deepwisdom.feishu.cn/drive/folder/RVyofv9cvlvtxKdddt2cyn3BnTc?from=from_copylink)
+- **Download and prepare datasets from scratch:**
+    ```bash
+    cd data
+    python dataset.py --save_analysis_pool
+    python hf_data.py --save_analysis_pool
+    ```
 
-## 2. Configs
+## 2. Configurations
 
 ### Data Config
 
-`datasets.yaml` Provide base prompts, metrics, target columns for respective datasets
-
-- Modify `datasets_dir` to the root directory of all the datasets in `data.yaml`
-
+- **`datasets.yaml`:** Provide base prompts, metrics, and target columns for respective datasets.
+- **`data.yaml`:** Modify `datasets_dir` to the base directory of all prepared datasets.
 
 ### LLM Config
 
-```
+```yaml
 llm:
   api_type: 'openai'
   model: deepseek-coder
@@ -32,237 +29,57 @@ ### LLM Config
   temperature: 0.5
 ```
 
-### Budget
-Experiment rollouts k = 5, 10, 20
-
-
-### Prompt Usage
-
-- Use the function `generate_task_requirement` in `dataset.py` to get task requirement.
-  - If the method is non-DI-based, set `is_di=False`.
-  - Use `utils.DATA_CONFIG` as `data_config`
-
 
 ## 3. SELA
 
 ### Run SELA
 
 #### Setup
-In the root directory, 
 
-```
+```bash
 pip install -e .
 
-cd expo
+cd metagpt/ext/sela
 
 pip install -r requirements.txt
 ```
 
-#### Run
+#### Running Experiments
 
-- Examples
-  ```
-  python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
-  python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better
-  ```
-
-
-- `--rollouts` - The number of rollouts
-
-- `--use_fixed_insights` - In addition to the generated insights, include the fixed insights saved in `expo/insights/fixed_insights.json`
-  
-- `--low_is_better` - If the dataset has reg metric, remember to use `--low_is_better`
-
-- `--from_scratch` - Do not use pre-processed insight pool, generate new insight pool based on dataset before running MCTS, facilitating subsequent tuning to propose search space prompts
-
-- `--role_timeout` - The timeout for the role
-  - This feature limits the duration of a single simulation, making the experiment duration more controllable (for example, if you do ten rollouts and set role_timeout to 1,000, the experiment will stop at the latest after 10,000s)
-
-
-- `--max_depth` - The maximum depth of MCTS, default is 4 (nodes at this depth directly return the previous simulation result without further expansion)
-
-- `--load_tree` - If MCTS was interrupted due to certain reasons but had already run multiple rollouts, you can use `--load_tree`.
-  - For example:
-    ```
+- **Examples:**
+    ```bash
     python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
-    ```
-  - If this was interrupted after running three rollouts, you can use `--load_tree`:
-    ```
-    python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree
+    python run_experiment.py --exp_mode mcts --task house-prices --rollouts 10 --low_is_better
     ```
 
+#### Parameters
 
-#### Ablation Study
+- **`--rollouts`:** The number of rollouts.
+- **`--use_fixed_insights`:** Include fixed insights saved in `expo/insights/fixed_insights.json`.
+- **`--low_is_better`:** Use this if the dataset has a regression metric.
+- **`--from_scratch`:** Generate a new insight pool based on the dataset before running MCTS.
+- **`--role_timeout`:** Limits the duration of a single simulation (e.g., `10 rollouts with timeout 1,000` = max 10,000s).
+- **`--max_depth`:** Set the maximum depth of MCTS (default is 4).
+- **`--load_tree`:** Load an existing MCTS tree if the previous experiment was interrupted.
+    - Example:
+      ```bash
+      python run_experiment.py --exp_mode mcts --task titanic --rollouts 10
+      ```
+    - To resume:
+      ```bash
+      python run_experiment.py --exp_mode mcts --task titanic --rollouts 7 --load_tree
+      ```
 
-**DI RandomSearch**
+### Ablation Study
 
-- Single insight
-`python run_experiment.py --exp_mode rs --task titanic --rs_mode single`
+**RandomSearch**
 
-- Set insight
-`python run_experiment.py --exp_mode rs --task titanic --rs_mode set`
+- **Use a single insight:**
+    ```bash
+    python run_experiment.py --exp_mode rs --task titanic --rs_mode single
+    ```
 
-
-## 4. Evaluation
-
-Each baseline needs to produce `dev_predictions.csv`和`test_predictions.csv`. Each csv file only needs a `target` column.
-
-- Use the function `evaluate_score` to evaluate.
-
-#### MLE-Bench
-**Note: mle-bench requires python 3.11 or higher**
-```
-git clone https://github.com/openai/mle-bench.git
-cd mle-bench
-pip install -e .
-```
-
-```
-mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
-```
-
-Enter the following command to run the experiment:
-```
-python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch --role_timeout 3600
-```
-
-
-## 5. Baselines
-
-### AIDE
-
-#### Setup
-The version of AIDE we use is dated September 30, 2024
-```
-git clone https://github.com/WecoAI/aideml.git
-git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc
-```
-
-Modify `aideml/aide/utils/config.yaml` - change `k_fold_validation`, `code model`, and `feedback model` as follows:
-
-```yaml
-# agent hyperparams
-agent:
-  # how many improvement iterations to run
-  steps: 10
-  # whether to instruct the agent to use CV (set to 1 to disable)
-  k_fold_validation: 1
-  # LLM settings for coding
-  code:
-    model: deepseek-coder
-    temp: 0.5
-
-  # LLM settings for evaluating program output / tracebacks
-  feedback:
-    model: deepseek-coder
-    temp: 0.5
-
-  # hyperparameters for the tree search
-  search:
-    max_debug_depth: 3
-    debug_prob: 0.5
-    num_drafts: 5
-```
-
-Since Deepseek is compatible to OpenAI's API, change `base_url` into `your own url`，`api_key` into `your api key`
-
-```
-export OPENAI_API_KEY="your api key"
-export OPENAI_BASE_URL="your own url"
-```
-
-Modify `aideml/aide/backend/__init__.py`'s line 30 and below:
-
-```python
-model_kwargs = model_kwargs | {
-        "model": model,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-    }
-if "claude-" in model:
-  query_func = backend_anthropic.query
-else:
-  query_func = backend_openai.query
-```
-
-Since deepseekV2.5 no longer supports system message using function call, modify `aideml/aide/agent.py`'s line 312:
-
-```python
-response = cast(
-    dict,
-    query(
-        system_message=None,
-        user_message=prompt,
-        func_spec=review_func_spec,
-        model=self.acfg.feedback.model,
-        temperature=self.acfg.feedback.temp,
-    ),
-)
-```
-
-Modify and install:
-
-```
-cd aideml
-pip install -e .
-```
-
-#### Run
-
-Run the following script to get the running results, a `log` folder and a `workspace` folder will be generated in the current directory
-The `log` folder will contain the experimental configuration and the generated scheme, and the `workspace` folder will save the final results generated by aide
-
-```
-python runner/aide.py
-```
-
-### Autogluon
-#### Setup
-```
-pip install -U pip
-pip install -U setuptools wheel
-pip install autogluon==1.1.1
-```
-
-For Tabular data:
-```
-python run_expriment.py --exp_mode autogluon --task {task_name}
-```
-For Multimodal data:
-```
-python run_expriment.py --exp_mode autogluon --task {task_name} --is_multimodal
-```
-Replace {task_name} with the specific task you want to run.
-
-
-### AutoSklearn
-#### System requirements
-auto-sklearn has the following system requirements:
-
-- Linux operating system (for example Ubuntu)
-
-- Python (>=3.7)
-
-- C++ compiler (with C++11 supports)
-
-In case you try to install Auto-sklearn on a system where no wheel files for the pyrfr package are provided (see here for available wheels) you also need:
-
-- SWIG [(get SWIG here).](https://www.swig.org/survey.html)
-
-For an explanation of missing Microsoft Windows and macOS support please check the Section [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility).
-
-#### Setup
-```
-pip install auto-sklearn==0.15.0
-```
-
-#### Run
-```
-python run_experiment.py --exp_mode autosklearn --task titanic
-```
-
-### Base DI 
-For setup, check 4.
-- `python run_experiment.py --exp_mode base --task titanic --num_experiments 10`
-- Specifically instruct DI to use AutoGluon: `--special_instruction ag`
-- Specifically instruct DI to use the stacking ensemble method: `--special_instruction stacking`
\ No newline at end of file
+- **Use a set of insights:**
+    ```bash
+    python run_experiment.py --exp_mode rs --task titanic --rs_mode set
+    ```
\ No newline at end of file
diff --git a/metagpt/ext/sela/runner/README.md b/metagpt/ext/sela/runner/README.md
new file mode 100644
index 000000000..7c031f1ee
--- /dev/null
+++ b/metagpt/ext/sela/runner/README.md
@@ -0,0 +1,198 @@
+# SELA: Tree-Search Enhanced LLM Agents for Automated Machine Learning
+
+This document provides instructions for running baseline models. To start with, ensure that you prepare the datasets as instructed in `sela/README.md`.
+
+## Baselines
+
+### 1. AIDE
+
+#### Setup
+
+We use the AIDE version from September 30, 2024. Clone the repository and check out the specified commit:
+
+```bash
+git clone https://github.com/WecoAI/aideml.git
+git checkout 77953247ea0a5dc1bd502dd10939dd6d7fdcc5cc
+```
+
+
+Modify `aideml/aide/utils/config.yaml` to set the following parameters:
+
+```yaml
+# agent hyperparams
+agent:
+  steps: 10  # Number of improvement iterations
+  k_fold_validation: 1  # Set to 1 to disable cross-validation
+  code:
+    model: deepseek-coder
+    temp: 0.5
+  feedback:
+    model: deepseek-coder
+    temp: 0.5
+  search:
+    max_debug_depth: 3
+    debug_prob: 0.5
+    num_drafts: 5
+```
+
+Update your OpenAI API credentials in the environment:
+
+```bash
+export OPENAI_API_KEY="your api key"
+export OPENAI_BASE_URL="your own url"
+```
+
+Modify `aideml/aide/backend/__init__.py` (line 30 and below):
+
+```python
+model_kwargs = model_kwargs | {
+        "model": model,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+    }
+if "claude-" in model:
+  query_func = backend_anthropic.query
+else:
+  query_func = backend_openai.query
+```
+
+Since Deepseek V2.5 no longer supports system messages using function calls, modify `aideml/aide/agent.py` (line 312):
+
+```python
+response = cast(
+    dict,
+    query(
+        system_message=None,
+        user_message=prompt,
+        func_spec=review_func_spec,
+        model=self.acfg.feedback.model,
+        temperature=self.acfg.feedback.temp,
+    ),
+)
+```
+
+Finally, install AIDE:
+
+```bash
+cd aideml
+pip install -e .
+```
+
+#### Run
+
+Execute the following script to generate results. A `log` folder (containing experimental configurations) and a `workspace` folder (storing final results) will be created:
+
+```bash
+python runner/aide.py
+```
+
+---
+
+### 2. Autogluon
+
+#### Setup
+
+Install Autogluon:
+
+```bash
+pip install -U pip
+pip install -U setuptools wheel
+pip install autogluon==1.1.1
+```
+
+#### Run
+
+For Tabular data:
+
+```bash
+python run_experiment.py --exp_mode autogluon --task {task_name}
+```
+
+For Multimodal data:
+
+```bash
+python run_experiment.py --exp_mode autogluon --task {task_name} --is_multimodal
+```
+
+Replace `{task_name}` with the specific task you want to run.
+
+---
+
+### 3. AutoSklearn
+
+**Note:**
+AutoSklearn requires:
+- Linux operating system (e.g., Ubuntu)
+- Python (>=3.7)
+- C++ compiler (with C++11 support)
+
+If installing on a system without wheel files for the `pyrfr` package, you also need:
+
+- [SWIG](https://www.swig.org/survey.html)
+
+Refer to the [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility) section for further details.
+
+#### Setup
+
+Install AutoSklearn:
+
+```bash
+pip install auto-sklearn==0.15.0
+```
+
+#### Run
+
+Execute the following command for the Titanic task:
+
+```bash
+python run_experiment.py --exp_mode autosklearn --task titanic
+```
+
+---
+
+### 4. Base Data Interpreter
+
+Run the following command for the Titanic task:
+
+```bash
+python run_experiment.py --exp_mode base --task titanic --num_experiments 10
+```
+
+---
+
+### 5. Custom Baselines
+
+To run additional baselines:
+
+- Each baseline must produce `dev_predictions.csv` and `test_predictions.csv` with a `target` column.
+- Use the `evaluate_score` function for evaluation.
+
+---
+
+## MLE-Bench
+
+**Note:** MLE-Bench requires Python 3.11 or higher.
+
+#### Setup
+
+Clone the repository and install:
+
+```bash
+git clone https://github.com/openai/mle-bench.git
+cd mle-bench
+pip install -e .
+```
+
+Prepare the data:
+
+```bash
+mlebench prepare -c <competition-id> --data-dir <dataset-dir-save-path>
+```
+
+#### Run the MLE-Bench Experiment
+
+Run the following command to execute the experiment:
+
+```bash
+python run_experiment.py --exp_mode mcts --custom_dataset_dir <dataset-dir-save-path/prepared/public> --rollouts 10 --from_scratch --role_timeout 3600
+```
\ No newline at end of file