diff --git a/docs/resources/AFLOW-experiment.jpg b/docs/resources/aflow/AFLOW-experiment.jpg
similarity index 100%
rename from docs/resources/AFLOW-experiment.jpg
rename to docs/resources/aflow/AFLOW-experiment.jpg
diff --git a/docs/resources/AFLOW-method.jpg b/docs/resources/aflow/AFLOW-method.jpg
similarity index 100%
rename from docs/resources/AFLOW-method.jpg
rename to docs/resources/aflow/AFLOW-method.jpg
diff --git a/docs/resources/AFLOW-performance.jpg b/docs/resources/aflow/AFLOW-performance.jpg
similarity index 100%
rename from docs/resources/AFLOW-performance.jpg
rename to docs/resources/aflow/AFLOW-performance.jpg
diff --git a/examples/aflow/README.md b/examples/aflow/README.md
index 9567e8cf4..616daeaf0 100644
--- a/examples/aflow/README.md
+++ b/examples/aflow/README.md
@@ -5,7 +5,7 @@ # AFlow: Automating Agentic Workflow Generation
 [Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFlow" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
 </p>
 
 ## Framework Components
@@ -17,7 +17,7 @@ ## Framework Components
 - **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Performance Of AFLOW" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Framework of AFlow" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
 </p>
 
 ## Datasets
@@ -26,7 +26,7 @@ ### Experimental Datasets
 We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`
 
 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFlow" title="Performance Of AFlow <sub>1</sub>" width="80%"></a>
 </p>
 
 ### Custom Datasets
@@ -68,7 +68,7 @@    # Or with custom parameters
    ```
 
 ## Reproduce the Results in the Paper
-1. We provide the raw data obtained from our experiments ([download link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv)), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
+1. We provide the raw data obtained from our experiments in this [link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
 2. You can directly reproduce our experimental results by running the scripts in `examples/aflow/experiments`.
 
 
diff --git a/examples/aflow/experiments/optimize_drop.py b/examples/aflow/experiments/optimize_drop.py
index 801c5222b..73abfe961 100644
--- a/examples/aflow/experiments/optimize_drop.py
+++ b/examples/aflow/experiments/optimize_drop.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "DROP"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for DROP")
+    parser.add_argument("--dataset", type=str, default="DROP", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_gsm8k.py b/examples/aflow/experiments/optimize_gsm8k.py
index e34fdb66d..17eafb664 100644
--- a/examples/aflow/experiments/optimize_gsm8k.py
+++ b/examples/aflow/experiments/optimize_gsm8k.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for GSM8K")
+    parser.add_argument("--dataset", type=str, default="GSM8K", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "ScEnsemble",
+        "Programmer",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_hotpotqa.py b/examples/aflow/experiments/optimize_hotpotqa.py
index 92d26ddd5..4ea2076a2 100644
--- a/examples/aflow/experiments/optimize_hotpotqa.py
+++ b/examples/aflow/experiments/optimize_hotpotqa.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "HotpotQA"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for HotpotQA")
+    parser.add_argument("--dataset", type=str, default="HotpotQA", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_humaneval.py b/examples/aflow/experiments/optimize_humaneval.py
index 6027e9ec8..20597d395 100644
--- a/examples/aflow/experiments/optimize_humaneval.py
+++ b/examples/aflow/experiments/optimize_humaneval.py
@@ -3,50 +3,52 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "HumanEval"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for HumanEval")
+    parser.add_argument("--dataset", type=str, default="HumanEval", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="code", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    "CustomCodeGenerate",  # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    "Test",  # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "CustomCodeGenerate",
+        "ScEnsemble",
+        "Test",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_math.py b/examples/aflow/experiments/optimize_math.py
index 5d951c168..40cc2b0d1 100644
--- a/examples/aflow/experiments/optimize_math.py
+++ b/examples/aflow/experiments/optimize_math.py
@@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for MATH")
+    parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "ScEnsemble",
+        "Programmer",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/experiments/optimize_mbpp.py b/examples/aflow/experiments/optimize_mbpp.py
index 00c008bbf..a27be8147 100644
--- a/examples/aflow/experiments/optimize_mbpp.py
+++ b/examples/aflow/experiments/optimize_mbpp.py
@@ -3,50 +3,52 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer
 
-# Crucial Parameters
-dataset: DatasetType = "MBPP"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.
 
-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for MBPP")
+    parser.add_argument("--dataset", type=str, default="MBPP", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="code", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()
 
-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    "CustomCodeGenerate",  # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    "Test",  # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)
 
 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "CustomCodeGenerate",
+        "ScEnsemble",
+        "Test",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
     optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
diff --git a/examples/aflow/optimize.py b/examples/aflow/optimize.py
index 65b194344..e24facb3a 100644
--- a/examples/aflow/optimize.py
+++ b/examples/aflow/optimize.py
@@ -9,17 +9,17 @@ from metagpt.configs.models_config import ModelsConfig
 from metagpt.ext.aflow.data.download_data import download
 from metagpt.ext.aflow.scripts.optimizer import Optimizer
 
-# DatasetType, QuestionType, and OptimizerType definitions
-# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
-# QuestionType = Literal["math", "code", "qa"]
-# OptimizerType = Literal["Graph", "Test"]
-
 
 def parse_args():
     parser = argparse.ArgumentParser(description="AFlow Optimizer")
-    parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="MATH",
+        help="Dataset type, including HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP",
+    )
     parser.add_argument("--sample", type=int, default=4, help="Sample count")
-    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type, including math, code, qa")
     parser.add_argument(
         "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
     )
diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py
index ab190b736..a974b35d1 100644
--- a/metagpt/actions/action_node.py
+++ b/metagpt/actions/action_node.py
@@ -510,8 +510,9 @@ class ActionNode:
         return {field_name: field.annotation for field_name, field in model_class.model_fields.items()}
 
     def xml_compile(self, context):
-        # TODO 再来一版
-
+        """
+        Compile the prompt to make it easier for the model to understand the format.
+        """
         field_names = self.get_field_names()
         # Construct the example using the field names
         examples = []
diff --git a/metagpt/ext/aflow/benchmark/utils.py b/metagpt/ext/aflow/benchmark/utils.py
index 60cbe5580..846101bc0 100644
--- a/metagpt/ext/aflow/benchmark/utils.py
+++ b/metagpt/ext/aflow/benchmark/utils.py
@@ -11,12 +11,12 @@ import os
 
 import numpy as np
 
-from metagpt.utils.common import write_json_file
+from metagpt.utils.common import read_json_file, write_json_file
 
 
 def generate_random_indices(n, n_samples, test=False):
     """
-    生成随机索引
+    Generate random indices
     """
 
     def _set_seed(seed=42):
@@ -52,20 +52,16 @@ def log_mismatch(problem, expected_output, prediction, predicted_number, path):
 
     log_file = os.path.join(path, "log.json")
 
-    # 检查log文件是否已经存在
+    # Check if the log file already exists
     if os.path.exists(log_file):
-        # 如果存在，加载现有的日志数据
-        with open(log_file, "r", encoding="utf-8") as f:
-            try:
-                data = json.load(f)
-            except json.JSONDecodeError:
-                data = []
+        # If it exists, load the existing log data
+        data = read_json_file(log_file)
     else:
-        # 如果不存在，创建一个新的日志列表
+        # If it does not exist, create a new log list
         data = []
 
-    # 添加新的日志记录
+    # Add the new log entry
     data.append(log_data)
 
-    # 将数据写回到log.json文件
+    # Write the data back to log.json file
     write_json_file(log_file, data, encoding="utf-8", indent=4)
diff --git a/metagpt/ext/aflow/scripts/optimizer.py b/metagpt/ext/aflow/scripts/optimizer.py
index 8dadc1d1a..0ac4827e7 100644
--- a/metagpt/ext/aflow/scripts/optimizer.py
+++ b/metagpt/ext/aflow/scripts/optimizer.py
@@ -10,6 +10,7 @@ from typing import List, Literal
 from pydantic import BaseModel, Field
 
 from metagpt.actions.action_node import ActionNode
+from metagpt.ext.aflow.scripts.evaluator import DatasetType
 from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
 from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
 from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
@@ -18,7 +19,6 @@ from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
 from metagpt.logs import logger
 from metagpt.provider.llm_provider_registry import create_llm_instance
 
-DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
 QuestionType = Literal["math", "code", "qa"]
 OptimizerType = Literal["Graph", "Test"]
 
diff --git a/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py b/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
index 246a94798..0e275f496 100644
--- a/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
@@ -76,8 +76,8 @@ class ConvergenceUtils:
         if len(self.avg_scores) < top_k + 1:
             return False, None, None
         convergence_count = 0  # Convergence counter
-        previous_Y = None  # Y value of the previous round (average of top_k scores)
-        sigma_Y_previous = None  # Standard error of Y value from previous round
+        previous_y = None  # Y value of the previous round (average of top_k scores)
+        sigma_y_previous = None  # Standard error of Y value from previous round
         for i in range(len(self.avg_scores)):
             # Dynamically select top_k from current round and all previous rounds
             top_k_indices = np.argsort(self.avg_scores[: i + 1])[::-1][
@@ -87,18 +87,18 @@ class ConvergenceUtils:
             top_k_stds = [
                 self.stds[j] for j in top_k_indices
             ]  # Get list of standard deviations corresponding to top k scores
-            # Calculate mean of top k scores for current round, i.e., Y_current
-            Y_current = np.mean(top_k_scores)
-            # Calculate standard error of Y_current (sigma_Y_current), representing score dispersion
-            sigma_Y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
+            # Calculate mean of top k scores for current round, i.e., y_current
+            y_current = np.mean(top_k_scores)
+            # Calculate standard error of y_current (sigma_y_current), representing score dispersion
+            sigma_y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
             # If not the first round, calculate change in Y (Delta_Y) and corresponding standard error
-            if previous_Y is not None:
+            if previous_y is not None:
                 # Calculate Y difference between current round and previous round
-                Delta_Y = Y_current - previous_Y
+                delta_y = y_current - previous_y
                 # Calculate standard error of Y difference (sigma_Delta_Y)
-                sigma_Delta_Y = np.sqrt(sigma_Y_current**2 + sigma_Y_previous**2)
+                sigma_delta_y = np.sqrt(sigma_y_current**2 + sigma_y_previous**2)
                 # Check if Y change is within acceptable confidence interval, i.e., convergence condition
-                if abs(Delta_Y) <= z * sigma_Delta_Y:
+                if abs(delta_y) <= z * sigma_delta_y:
                     convergence_count += 1
                     # If consecutive converged rounds reach set value, return convergence information
                     if convergence_count >= consecutive_rounds:
@@ -107,8 +107,8 @@ class ConvergenceUtils:
                     # If change is large, reset convergence counter
                     convergence_count = 0
             # Update Y value and standard error for previous round
-            previous_Y = Y_current
-            sigma_Y_previous = sigma_Y_current
+            previous_y = y_current
+            sigma_y_previous = sigma_y_current
         # If convergence condition not met, return not converged
         return False, None, None
 
diff --git a/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py b/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
index f55ff8740..2df161ed8 100644
--- a/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
@@ -7,6 +7,7 @@ import numpy as np
 import pandas as pd
 
 from metagpt.logs import logger
+from metagpt.utils.common import read_json_file, write_json_file
 
 
 class DataUtils:
@@ -17,11 +18,7 @@ class DataUtils:
     def load_results(self, path: str) -> list:
         result_path = os.path.join(path, "results.json")
         if os.path.exists(result_path):
-            with open(result_path, "r") as json_file:
-                try:
-                    return json.load(json_file)
-                except json.JSONDecodeError:
-                    return []
+            return read_json_file(result_path, encoding="utf-8")
         return []
 
     def get_top_rounds(self, sample: int, path=None, mode="Graph"):
@@ -97,8 +94,7 @@ class DataUtils:
         if not os.path.exists(log_dir):
             return ""  # 如果文件不存在，返回空字符串
         logger.info(log_dir)
-        with open(log_dir, "r", encoding="utf-8") as f:
-            data = json.load(f)
+        data = read_json_file(log_dir, encoding="utf-8")
 
         if isinstance(data, dict):
             data = [data]
@@ -125,8 +121,7 @@ class DataUtils:
         return {"round": round, "score": score, "avg_cost": avg_cost, "total_cost": total_cost, "time": now}
 
     def save_results(self, json_file_path: str, data: list):
-        with open(json_file_path, "w") as json_file:
-            json.dump(data, json_file, default=str, indent=4)
+        write_json_file(json_file_path, data, encoding="utf-8", indent=4)
 
     def _load_scores(self, path=None, mode="Graph"):
         if mode == "Graph":
@@ -137,8 +132,7 @@ class DataUtils:
         result_file = os.path.join(rounds_dir, "results.json")
         self.top_scores = []
 
-        with open(result_file, "r", encoding="utf-8") as file:
-            data = json.load(file)
+        data = read_json_file(result_file, encoding="utf-8")
         df = pd.DataFrame(data)
 
         scores_per_round = df.groupby("round")["score"].mean().to_dict()
diff --git a/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py b/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
index cffd8b522..43f9eb1d5 100644
--- a/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
@@ -3,6 +3,7 @@ import os
 from collections import defaultdict
 
 from metagpt.logs import logger
+from metagpt.utils.common import read_json_file, write_json_file
 
 
 class ExperienceUtils:
@@ -24,23 +25,22 @@ class ExperienceUtils:
                     round_number = int(round_dir.split("_")[1])
                     json_file_path = os.path.join(round_path, "experience.json")
                     if os.path.exists(json_file_path):
-                        with open(json_file_path, "r", encoding="utf-8") as json_file:
-                            data = json.load(json_file)
-                            father_node = data["father node"]
+                        data = read_json_file(json_file_path, encoding="utf-8")
+                        father_node = data["father node"]
 
-                            if experience_data[father_node]["score"] is None:
-                                experience_data[father_node]["score"] = data["before"]
+                        if experience_data[father_node]["score"] is None:
+                            experience_data[father_node]["score"] = data["before"]
 
-                            if data["succeed"]:
-                                experience_data[father_node]["success"][round_number] = {
-                                    "modification": data["modification"],
-                                    "score": data["after"],
-                                }
-                            else:
-                                experience_data[father_node]["failure"][round_number] = {
-                                    "modification": data["modification"],
-                                    "score": data["after"],
-                                }
+                        if data["succeed"]:
+                            experience_data[father_node]["success"][round_number] = {
+                                "modification": data["modification"],
+                                "score": data["after"],
+                            }
+                        else:
+                            experience_data[father_node]["failure"][round_number] = {
+                                "modification": data["modification"],
+                                "score": data["after"],
+                            }
                 except Exception as e:
                     logger.info(f"Error processing {round_dir}: {str(e)}")
 
@@ -93,5 +93,4 @@ class ExperienceUtils:
         experience["after"] = avg_score
         experience["succeed"] = bool(avg_score > experience["before"])
 
-        with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
-            json.dump(experience, file, ensure_ascii=False, indent=4)
+        write_json_file(os.path.join(directory, "experience.json"), experience, encoding="utf-8", indent=4)
diff --git a/metagpt/ext/aflow/scripts/utils.py b/metagpt/ext/aflow/scripts/utils.py
index bc97f0818..c2fdd0cb7 100644
--- a/metagpt/ext/aflow/scripts/utils.py
+++ b/metagpt/ext/aflow/scripts/utils.py
@@ -6,11 +6,17 @@
 
 import json
 import re
+from enum import Enum
 from typing import Any, List, Tuple
 
 
-def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
-    if dataset == "HumanEval":
+class CodeDataset(Enum):
+    HUMAN_EVAL = "HumanEval"
+    MBPP = "MBPP"
+
+
+def extract_test_cases_from_jsonl(entry_point: str, dataset: CodeDataset = CodeDataset.HUMAN_EVAL):
+    if dataset == CodeDataset.HUMAN_EVAL:
         file_path = "metagpt/ext/aflow/data/humaneval_public_test.jsonl"
         # Retain the original hardcoded test cases
         hardcoded_cases = {
@@ -25,7 +31,7 @@ def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
             "sum_squares": "",
             "starts_one_ends": "",
         }
-    elif dataset == "MBPP":
+    elif dataset == CodeDataset.MBPP:
         file_path = "metagpt/ext/aflow/data/mbpp_public_test.jsonl"
         hardcoded_cases = {
             "remove_odd": "",
diff --git a/metagpt/ext/aflow/scripts/workflow.py b/metagpt/ext/aflow/scripts/workflow.py
index d0f883071..47b54021b 100644
--- a/metagpt/ext/aflow/scripts/workflow.py
+++ b/metagpt/ext/aflow/scripts/workflow.py
@@ -3,13 +3,11 @@
 # @Author  : didi
 # @Desc    : Basic Graph Class
 
-from typing import Literal
 
+from metagpt.ext.aflow.scripts.evaluator import DatasetType
 from metagpt.provider.llm_provider_registry import create_llm_instance
 from metagpt.utils.cost_manager import CostManager
 
-DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
-
 
 class Workflow:
     def __init__(