diff --git a/expo/MCTS.py b/expo/MCTS.py
index 9026e09b4..af50ff7a0 100644
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@@ -4,9 +4,9 @@ import os
 import pandas as pd
 from expo.research_assistant import ResearchAssistant
 from expo.insights.InsightGenerate import InsightGenerator
-from expo.dataset import get_split_dataset_path
+from expo.dataset import get_split_dataset_path, generate_task_requirement
 from expo.evaluation.evaluation import evaluate_score
-from expo.utils import mcts_logger, load_execute_notebook, generate_task_requirement, get_exp_pool_path
+from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
 
 from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
 from metagpt.utils.common import write_json_file, read_json_file, format_trackback_info
diff --git a/expo/data.yaml b/expo/data.yaml
index d921e1ebf..df26e29e8 100644
--- a/expo/data.yaml
+++ b/expo/data.yaml
@@ -152,6 +152,6 @@ datasets:
       \ eval data. Do not plot or make any visualizations.\n"
 
 
-work_dir: D:/work/MG-open/MetaGPT/workspace # path to the workspace directory
+work_dir: ../workspace # path to the workspace directory
 role_dir: storage/team/environment/roles/ResearchAssistant_David
 # analysis_pool_dir: D:/work/MG-open/MetaGPT/examples/MCTS_test/analysis_pool_sample.json
\ No newline at end of file
diff --git a/expo/dataset.py b/expo/dataset.py
index 4bce6e9fe..a507d0b7e 100644
--- a/expo/dataset.py
+++ b/expo/dataset.py
@@ -5,7 +5,7 @@ import os
 import json
 import yaml
 import pandas as pd
-from examples.MCTS_test.insights.solution_designer import SolutionDesigner
+from expo.insights.solution_designer import SolutionDesigner
 import asyncio
 
 BASE_USER_REQUIREMENT = """\
@@ -14,6 +14,35 @@ Perform data analysis, data preprocessing, feature engineering, and modeling to
 Report {metric} on the eval data. Do not plot or make any visualizations.
 """
 
+TASK_PROMPT = """\
+# User requirement
+{user_requirement}
+**Attention** Please do not leak the target label in any form during training.
+
+## Saving Dev and Test Predictions
+Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results. 
+The file should contain a single `target` column with the predicted values.
+Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
+
+## Output Training Set Performance
+Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
+Print the training set performance in the last step. Write in this format:
+```python
+...
+print("Train score:", train_score)
+```
+
+# Data dir
+training: {train_path}
+dev: {dev_path}
+testing: {test_path}
+
+# Output dir
+{output_dir}
+
+"""
+
+
 SEED = 100
 TRAIN_TEST_SPLIT = 0.8
 TRAIN_DEV_SPLIT = 0.75
@@ -89,6 +118,20 @@ def create_dataset_dict(dataset):
     }
     return dataset_dict
 
+def generate_task_requirement(task_name, data_config):
+    user_requirement = get_user_requirement(task_name, data_config)
+    split_dataset_path = get_split_dataset_path(task_name, data_config)
+    train_path = split_dataset_path["train"]
+    dev_path = split_dataset_path["dev_wo_target"]
+    test_path = split_dataset_path["test_wo_target"]
+    work_dir = data_config["work_dir"]
+    output_dir = f"{work_dir}/{task_name}"
+    user_requirement = TASK_PROMPT.format(user_requirement=user_requirement, 
+                                          train_path=train_path, dev_path=dev_path, test_path=test_path,
+                                          output_dir=output_dir)
+    return user_requirement
+
+
 class ExpDataset:
     description : str = None
     metadata : dict = None
diff --git a/expo/insights/InsightGenerate.py b/expo/insights/InsightGenerate.py
index de58b7e4e..55ab64e30 100644
--- a/expo/insights/InsightGenerate.py
+++ b/expo/insights/InsightGenerate.py
@@ -23,7 +23,7 @@ import random
 import json
 from metagpt.llm import LLM
 from metagpt.schema import Message
-from examples.MCTS_test.utils import load_data_config, mcts_logger
+from expo.utils import load_data_config, mcts_logger
 DATA_CONFIG = load_data_config()
 
 
diff --git a/expo/insights/solution_designer.py b/expo/insights/solution_designer.py
index 0986c392a..e2bf57ae3 100644
--- a/expo/insights/solution_designer.py
+++ b/expo/insights/solution_designer.py
@@ -3,7 +3,7 @@ import random
 import json
 from metagpt.llm import LLM
 from metagpt.schema import Message
-from examples.MCTS_test.utils import clean_json_from_rsp, load_data_config
+from expo.utils import clean_json_from_rsp, load_data_config
 
 
 DATA_CONFIG = load_data_config()
diff --git a/expo/research_assistant.py b/expo/research_assistant.py
index fbd74f7db..7b844cf5e 100644
--- a/expo/research_assistant.py
+++ b/expo/research_assistant.py
@@ -10,6 +10,9 @@ from metagpt.utils.common import write_json_file, read_json_file, format_trackba
 from metagpt.const import MESSAGE_ROUTE_TO_ALL, SERDESER_PATH
 from metagpt.utils.recovery_util import save_history
 from expo.utils import mcts_logger, save_notebook
+from pydantic import Field, model_validator
+from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
+
 import re
 import os
 
@@ -84,6 +87,17 @@ class ResearchAssistant(DataInterpreter):
         json_block = CodeParser.parse_code(block=None, text=rsp)
         score_dict = json.loads(json_block)
         return score_dict
+    
+
+    @model_validator(mode="after")
+    def set_plan_and_tool(self) -> "Interpreter":
+        if self.planner.plan.goal != '':
+            self.set_actions([WriteAnalysisCode])
+            self._set_state(0)
+            print("Plan already exists, skipping initialization.")
+            return self
+        print("Initializing plan and tool...")
+        return super().set_plan_and_tool()
 
     async def _act_on_task(self, current_task: Task) -> TaskResult:
         """Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation."""
diff --git a/expo/run_exp_augmentation.py b/expo/run_exp_augmentation.py
index 492a424d4..f4d22093f 100644
--- a/expo/run_exp_augmentation.py
+++ b/expo/run_exp_augmentation.py
@@ -1,10 +1,11 @@
 import os
-from metagpt.roles.di.research_assistant import ResearchAssistant
+from expo.research_assistant import ResearchAssistant
 import asyncio
-from examples.MCTS_test.utils import DATA_CONFIG, generate_task_requirement, get_exp_pool_path
-from examples.MCTS_test.insights.InsightGenerate import InsightGenerator
-from examples.MCTS_test.MCTS import create_initial_state
-from examples.MCTS_test.evaluation.evaluation import evaluate_score
+from expo.utils import DATA_CONFIG, get_exp_pool_path
+from expo.dataset import generate_task_requirement
+from expo.insights.InsightGenerate import InsightGenerator
+from expo.MCTS import create_initial_state
+from expo.evaluation.evaluation import evaluate_score
 import json
 import argparse
 import pandas as pd
diff --git a/expo/run_experiment.py b/expo/run_experiment.py
index e75897f5a..0c7468ac9 100644
--- a/expo/run_experiment.py
+++ b/expo/run_experiment.py
@@ -1,6 +1,7 @@
-from examples.MCTS_test.MCTS import MCTS, Node, initialize_di_root_node
-from examples.MCTS_test.utils import load_data_config, generate_task_requirement
-from examples.MCTS_test.visualize_mcts import get_tree_text
+from expo.MCTS import MCTS, Node, initialize_di_root_node
+from expo.utils import load_data_config
+from expo.dataset import generate_task_requirement
+from expo.evaluation.visualize_mcts import get_tree_text
 import asyncio
 import argparse
 
diff --git a/expo/run_mcts.py b/expo/run_mcts.py
index 0c0c486db..6d2c421ec 100644
--- a/expo/run_mcts.py
+++ b/expo/run_mcts.py
@@ -1,5 +1,7 @@
 from expo.MCTS import MCTS, Node, initialize_di_root_node
-from expo.utils import load_data_config, generate_task_requirement
+from expo.utils import load_data_config
+from expo.dataset import generate_task_requirement
+
 from expo.evaluation.visualize_mcts import get_tree_text
 import asyncio
 import argparse
diff --git a/expo/utils.py b/expo/utils.py
index ac4a64697..423889f29 100644
--- a/expo/utils.py
+++ b/expo/utils.py
@@ -1,5 +1,4 @@
 import yaml
-from examples.MCTS_test.dataset import get_user_requirement, get_split_dataset_path
 from metagpt.roles.role import Role
 from metagpt.actions.di.execute_nb_code import ExecuteNbCode
 from metagpt.utils.save_code import save_code_file
@@ -13,34 +12,6 @@ import sys
 import os
 import re
 
-TASK_PROMPT = """\
-# User requirement
-{user_requirement}
-**Attention** Please do not leak the target label in any form during training.
-
-## Saving Dev and Test Predictions
-Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results. 
-The file should contain a single `target` column with the predicted values.
-Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
-
-## Output Training Set Performance
-Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
-Print the training set performance in the last step. Write in this format:
-```python
-...
-print("Train score:", train_score)
-```
-
-# Data dir
-training: {train_path}
-dev: {dev_path}
-testing: {test_path}
-
-# Output dir
-{output_dir}
-
-"""
-
 def load_data_config(file_path="data.yaml"):
     with open(file_path, 'r') as stream:
         data_config = yaml.safe_load(stream)
@@ -78,18 +49,6 @@ def get_exp_pool_path(task_name, data_config, pool_name="analysis_pool"):
     exp_pool_path = os.path.join(data_path, f"{pool_name}.json")
     return exp_pool_path
 
-def generate_task_requirement(task_name, data_config):
-    user_requirement = get_user_requirement(task_name, data_config)
-    split_dataset_path = get_split_dataset_path(task_name, data_config)
-    train_path = split_dataset_path["train"]
-    dev_path = split_dataset_path["dev_wo_target"]
-    test_path = split_dataset_path["test_wo_target"]
-    work_dir = data_config["work_dir"]
-    output_dir = f"{work_dir}/{task_name}"
-    user_requirement = TASK_PROMPT.format(user_requirement=user_requirement, 
-                                          train_path=train_path, dev_path=dev_path, test_path=test_path,
-                                          output_dir=output_dir)
-    return user_requirement
 
 def change_plan(role, plan):
     print(f"Change next plan to: {plan}")