Update for solving latest review.

2026-06-08 15:05:17 +02:00 · 2024-10-29 16:04:13 +08:00 · 2024-10-29 16:04:13 +08:00 · d01051abc6
commit d01051abc6
parent f0a3a3f739
19 changed files with 314 additions and 312 deletions
--- a/docs/resources/aflow/AFLOW-experiment.jpg
+++ b/docs/resources/aflow/AFLOW-experiment.jpg
--- a/docs/resources/aflow/AFLOW-method.jpg
+++ b/docs/resources/aflow/AFLOW-method.jpg
--- a/docs/resources/aflow/AFLOW-performance.jpg
+++ b/docs/resources/aflow/AFLOW-performance.jpg
--- a/examples/aflow/README.md
+++ b/examples/aflow/README.md
@ -5,7 +5,7 @@ # AFlow: Automating Agentic Workflow Generation
 [Read our paper on arXiv](https://arxiv.org/abs/2410.10762)

 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFlow" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
 </p>

 ## Framework Components
@ -17,7 +17,7 @@ ## Framework Components
 - **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.

 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Performance Of AFLOW" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Framework of AFlow" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
 </p>

 ## Datasets
@ -26,7 +26,7 @@ ### Experimental Datasets
 We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`

 <p align="center">
-<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>" width="80%"></a>
+<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFlow" title="Performance Of AFlow <sub>1</sub>" width="80%"></a>
 </p>

 ### Custom Datasets
@ -68,7 +68,7 @@    # Or with custom parameters
   ```

 ## Reproduce the Results in the Paper
-1. We provide the raw data obtained from our experiments ([download link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv)), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
+1. We provide the raw data obtained from our experiments in this [link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
 2. You can directly reproduce our experimental results by running the scripts in `examples/aflow/experiments`.


--- a/examples/aflow/experiments/optimize_drop.py
+++ b/examples/aflow/experiments/optimize_drop.py
@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "DROP"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for DROP")
+    parser.add_argument("--dataset", type=str, default="DROP", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_gsm8k.py
+++ b/examples/aflow/experiments/optimize_gsm8k.py
@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for GSM8K")
+    parser.add_argument("--dataset", type=str, default="GSM8K", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "ScEnsemble",
+        "Programmer",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_hotpotqa.py
+++ b/examples/aflow/experiments/optimize_hotpotqa.py
@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "HotpotQA"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for HotpotQA")
+    parser.add_argument("--dataset", type=str, default="HotpotQA", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="qa", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    "AnswerGenerate",  # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "AnswerGenerate",
+        "ScEnsemble",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_humaneval.py
+++ b/examples/aflow/experiments/optimize_humaneval.py
@ -3,50 +3,52 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "HumanEval"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for HumanEval")
+    parser.add_argument("--dataset", type=str, default="HumanEval", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="code", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    "CustomCodeGenerate",  # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    "Test",  # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "CustomCodeGenerate",
+        "ScEnsemble",
+        "Test",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_math.py
+++ b/examples/aflow/experiments/optimize_math.py
@ -3,50 +3,51 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for MATH")
+    parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    # "CustomCodeGenerate",         # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    # "Test",                       # It's for code
-    "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "ScEnsemble",
+        "Programmer",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_mbpp.py
+++ b/examples/aflow/experiments/optimize_mbpp.py
@ -3,50 +3,52 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

+import argparse
+
 from metagpt.configs.models_config import ModelsConfig
-from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+from metagpt.ext.aflow.scripts.evaluator import Optimizer

-# Crucial Parameters
-dataset: DatasetType = "MBPP"  # Ensure the type is consistent with DatasetType
-sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
-optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
-initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20  # The max iteration of AFLOW.
-check_convergence: bool = True  # Whether Early Stop
-validation_rounds: int = 5  # The validation rounds of AFLOW.

-# Config llm model, you can modify `config/config2.yaml` to use more llms.
-mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
-claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+def parse_args():
+    parser = argparse.ArgumentParser(description="AFlow Optimizer for MBPP")
+    parser.add_argument("--dataset", type=str, default="MBPP", help="Dataset type")
+    parser.add_argument("--sample", type=int, default=4, help="Sample count")
+    parser.add_argument("--question_type", type=str, default="code", help="Question type")
+    parser.add_argument(
+        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
+    )
+    parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
+    parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
+    parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
+    parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
+    return parser.parse_args()

-# Config operators.
-operators = [
-    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate",              # It's for qa
-    "CustomCodeGenerate",  # It's for code
-    "ScEnsemble",  # It's for code, math and qa
-    "Test",  # It's for code
-    # "Programmer",  # It's for math
-]
-
-# Create an optimizer instance
-optimizer = Optimizer(
-    dataset=dataset,  # Config dataset
-    question_type=question_type,  # Config Question Type
-    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
-    exec_llm_config=mini_llm_config,  # Config Execution LLM
-    check_convergence=check_convergence,  # Whether Early Stop
-    operators=operators,  # Config Operators you want to use
-    optimized_path=optimized_path,  # Config Optimized workflow's file path
-    sample=sample,  # Only Top(sample) rounds will be selected.
-    initial_round=initial_round,  # Optimize from initial round
-    max_rounds=max_rounds,  # The max iteration of AFLOW.
-    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
-)

 if __name__ == "__main__":
-    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    args = parse_args()
+
+    mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+    claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+    operators = [
+        "Custom",
+        "CustomCodeGenerate",
+        "ScEnsemble",
+        "Test",
+    ]
+
+    optimizer = Optimizer(
+        dataset=args.dataset,
+        question_type=args.question_type,
+        opt_llm_config=claude_llm_config,
+        exec_llm_config=mini_llm_config,
+        check_convergence=args.check_convergence,
+        operators=operators,
+        optimized_path=args.optimized_path,
+        sample=args.sample,
+        initial_round=args.initial_round,
+        max_rounds=args.max_rounds,
+        validation_rounds=args.validation_rounds,
+    )
+
    optimizer.optimize("Graph")
-    # Test workflow via setting the optimizer's mode to 'Test'
-    # optimizer.optimize("Test")
--- a/examples/aflow/optimize.py
+++ b/examples/aflow/optimize.py
@ -9,17 +9,17 @@ from metagpt.configs.models_config import ModelsConfig
 from metagpt.ext.aflow.data.download_data import download
 from metagpt.ext.aflow.scripts.optimizer import Optimizer

-# DatasetType, QuestionType, and OptimizerType definitions
-# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
-# QuestionType = Literal["math", "code", "qa"]
-# OptimizerType = Literal["Graph", "Test"]
-

 def parse_args():
    parser = argparse.ArgumentParser(description="AFlow Optimizer")
-    parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="MATH",
+        help="Dataset type, including HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP",
+    )
    parser.add_argument("--sample", type=int, default=4, help="Sample count")
-    parser.add_argument("--question_type", type=str, default="math", help="Question type")
+    parser.add_argument("--question_type", type=str, default="math", help="Question type, including math, code, qa")
    parser.add_argument(
        "--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
    )
--- a/metagpt/actions/action_node.py
+++ b/metagpt/actions/action_node.py
@ -510,8 +510,9 @@ class ActionNode:
        return {field_name: field.annotation for field_name, field in model_class.model_fields.items()}

    def xml_compile(self, context):
-        # TODO 再来一版
-
+        """
+        Compile the prompt to make it easier for the model to understand the format.
+        """
        field_names = self.get_field_names()
        # Construct the example using the field names
        examples = []
--- a/metagpt/ext/aflow/benchmark/utils.py
+++ b/metagpt/ext/aflow/benchmark/utils.py
@ -11,12 +11,12 @@ import os

 import numpy as np

-from metagpt.utils.common import write_json_file
+from metagpt.utils.common import read_json_file, write_json_file


 def generate_random_indices(n, n_samples, test=False):
    """
-    生成随机索引
+    Generate random indices
    """

    def _set_seed(seed=42):
@ -52,20 +52,16 @@ def log_mismatch(problem, expected_output, prediction, predicted_number, path):

    log_file = os.path.join(path, "log.json")

-    # 检查log文件是否已经存在
+    # Check if the log file already exists
    if os.path.exists(log_file):
-        # 如果存在，加载现有的日志数据
-        with open(log_file, "r", encoding="utf-8") as f:
-            try:
-                data = json.load(f)
-            except json.JSONDecodeError:
-                data = []
+        # If it exists, load the existing log data
+        data = read_json_file(log_file)
    else:
-        # 如果不存在，创建一个新的日志列表
+        # If it does not exist, create a new log list
        data = []

-    # 添加新的日志记录
+    # Add the new log entry
    data.append(log_data)

-    # 将数据写回到log.json文件
+    # Write the data back to log.json file
    write_json_file(log_file, data, encoding="utf-8", indent=4)
--- a/metagpt/ext/aflow/scripts/optimizer.py
+++ b/metagpt/ext/aflow/scripts/optimizer.py
@ -10,6 +10,7 @@ from typing import List, Literal
 from pydantic import BaseModel, Field

 from metagpt.actions.action_node import ActionNode
+from metagpt.ext.aflow.scripts.evaluator import DatasetType
 from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
 from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
 from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
@ -18,7 +19,6 @@ from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
 from metagpt.logs import logger
 from metagpt.provider.llm_provider_registry import create_llm_instance

-DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
 QuestionType = Literal["math", "code", "qa"]
 OptimizerType = Literal["Graph", "Test"]

--- a/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
@ -76,8 +76,8 @@ class ConvergenceUtils:
        if len(self.avg_scores) < top_k + 1:
            return False, None, None
        convergence_count = 0  # Convergence counter
-        previous_Y = None  # Y value of the previous round (average of top_k scores)
-        sigma_Y_previous = None  # Standard error of Y value from previous round
+        previous_y = None  # Y value of the previous round (average of top_k scores)
+        sigma_y_previous = None  # Standard error of Y value from previous round
        for i in range(len(self.avg_scores)):
            # Dynamically select top_k from current round and all previous rounds
            top_k_indices = np.argsort(self.avg_scores[: i + 1])[::-1][
@ -87,18 +87,18 @@ class ConvergenceUtils:
            top_k_stds = [
                self.stds[j] for j in top_k_indices
            ]  # Get list of standard deviations corresponding to top k scores
-            # Calculate mean of top k scores for current round, i.e., Y_current
-            Y_current = np.mean(top_k_scores)
-            # Calculate standard error of Y_current (sigma_Y_current), representing score dispersion
-            sigma_Y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
+            # Calculate mean of top k scores for current round, i.e., y_current
+            y_current = np.mean(top_k_scores)
+            # Calculate standard error of y_current (sigma_y_current), representing score dispersion
+            sigma_y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
            # If not the first round, calculate change in Y (Delta_Y) and corresponding standard error
-            if previous_Y is not None:
+            if previous_y is not None:
                # Calculate Y difference between current round and previous round
-                Delta_Y = Y_current - previous_Y
+                delta_y = y_current - previous_y
                # Calculate standard error of Y difference (sigma_Delta_Y)
-                sigma_Delta_Y = np.sqrt(sigma_Y_current**2 + sigma_Y_previous**2)
+                sigma_delta_y = np.sqrt(sigma_y_current**2 + sigma_y_previous**2)
                # Check if Y change is within acceptable confidence interval, i.e., convergence condition
-                if abs(Delta_Y) <= z * sigma_Delta_Y:
+                if abs(delta_y) <= z * sigma_delta_y:
                    convergence_count += 1
                    # If consecutive converged rounds reach set value, return convergence information
                    if convergence_count >= consecutive_rounds:
@ -107,8 +107,8 @@ class ConvergenceUtils:
                    # If change is large, reset convergence counter
                    convergence_count = 0
            # Update Y value and standard error for previous round
-            previous_Y = Y_current
-            sigma_Y_previous = sigma_Y_current
+            previous_y = y_current
+            sigma_y_previous = sigma_y_current
        # If convergence condition not met, return not converged
        return False, None, None

--- a/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
@ -7,6 +7,7 @@ import numpy as np
 import pandas as pd

 from metagpt.logs import logger
+from metagpt.utils.common import read_json_file, write_json_file


 class DataUtils:
@ -17,11 +18,7 @@ class DataUtils:
    def load_results(self, path: str) -> list:
        result_path = os.path.join(path, "results.json")
        if os.path.exists(result_path):
-            with open(result_path, "r") as json_file:
-                try:
-                    return json.load(json_file)
-                except json.JSONDecodeError:
-                    return []
+            return read_json_file(result_path, encoding="utf-8")
        return []

    def get_top_rounds(self, sample: int, path=None, mode="Graph"):
@ -97,8 +94,7 @@ class DataUtils:
        if not os.path.exists(log_dir):
            return ""  # 如果文件不存在，返回空字符串
        logger.info(log_dir)
-        with open(log_dir, "r", encoding="utf-8") as f:
-            data = json.load(f)
+        data = read_json_file(log_dir, encoding="utf-8")

        if isinstance(data, dict):
            data = [data]
@ -125,8 +121,7 @@ class DataUtils:
        return {"round": round, "score": score, "avg_cost": avg_cost, "total_cost": total_cost, "time": now}

    def save_results(self, json_file_path: str, data: list):
-        with open(json_file_path, "w") as json_file:
-            json.dump(data, json_file, default=str, indent=4)
+        write_json_file(json_file_path, data, encoding="utf-8", indent=4)

    def _load_scores(self, path=None, mode="Graph"):
        if mode == "Graph":
@ -137,8 +132,7 @@ class DataUtils:
        result_file = os.path.join(rounds_dir, "results.json")
        self.top_scores = []

-        with open(result_file, "r", encoding="utf-8") as file:
-            data = json.load(file)
+        data = read_json_file(result_file, encoding="utf-8")
        df = pd.DataFrame(data)

        scores_per_round = df.groupby("round")["score"].mean().to_dict()
--- a/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
@ -3,6 +3,7 @@ import os
 from collections import defaultdict

 from metagpt.logs import logger
+from metagpt.utils.common import read_json_file, write_json_file


 class ExperienceUtils:
@ -24,23 +25,22 @@ class ExperienceUtils:
                    round_number = int(round_dir.split("_")[1])
                    json_file_path = os.path.join(round_path, "experience.json")
                    if os.path.exists(json_file_path):
-                        with open(json_file_path, "r", encoding="utf-8") as json_file:
-                            data = json.load(json_file)
-                            father_node = data["father node"]
+                        data = read_json_file(json_file_path, encoding="utf-8")
+                        father_node = data["father node"]

-                            if experience_data[father_node]["score"] is None:
-                                experience_data[father_node]["score"] = data["before"]
+                        if experience_data[father_node]["score"] is None:
+                            experience_data[father_node]["score"] = data["before"]

-                            if data["succeed"]:
-                                experience_data[father_node]["success"][round_number] = {
-                                    "modification": data["modification"],
-                                    "score": data["after"],
-                                }
-                            else:
-                                experience_data[father_node]["failure"][round_number] = {
-                                    "modification": data["modification"],
-                                    "score": data["after"],
-                                }
+                        if data["succeed"]:
+                            experience_data[father_node]["success"][round_number] = {
+                                "modification": data["modification"],
+                                "score": data["after"],
+                            }
+                        else:
+                            experience_data[father_node]["failure"][round_number] = {
+                                "modification": data["modification"],
+                                "score": data["after"],
+                            }
                except Exception as e:
                    logger.info(f"Error processing {round_dir}: {str(e)}")

@ -93,5 +93,4 @@ class ExperienceUtils:
        experience["after"] = avg_score
        experience["succeed"] = bool(avg_score > experience["before"])

-        with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
-            json.dump(experience, file, ensure_ascii=False, indent=4)
+        write_json_file(os.path.join(directory, "experience.json"), experience, encoding="utf-8", indent=4)
--- a/metagpt/ext/aflow/scripts/utils.py
+++ b/metagpt/ext/aflow/scripts/utils.py
@ -6,11 +6,17 @@

 import json
 import re
+from enum import Enum
 from typing import Any, List, Tuple


-def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
-    if dataset == "HumanEval":
+class CodeDataset(Enum):
+    HUMAN_EVAL = "HumanEval"
+    MBPP = "MBPP"
+
+
+def extract_test_cases_from_jsonl(entry_point: str, dataset: CodeDataset = CodeDataset.HUMAN_EVAL):
+    if dataset == CodeDataset.HUMAN_EVAL:
        file_path = "metagpt/ext/aflow/data/humaneval_public_test.jsonl"
        # Retain the original hardcoded test cases
        hardcoded_cases = {
@ -25,7 +31,7 @@ def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
            "sum_squares": "",
            "starts_one_ends": "",
        }
-    elif dataset == "MBPP":
+    elif dataset == CodeDataset.MBPP:
        file_path = "metagpt/ext/aflow/data/mbpp_public_test.jsonl"
        hardcoded_cases = {
            "remove_odd": "",
--- a/metagpt/ext/aflow/scripts/workflow.py
+++ b/metagpt/ext/aflow/scripts/workflow.py
@ -3,13 +3,11 @@
 # @Author  : didi
 # @Desc    : Basic Graph Class

-from typing import Literal

+from metagpt.ext.aflow.scripts.evaluator import DatasetType
 from metagpt.provider.llm_provider_registry import create_llm_instance
 from metagpt.utils.cost_manager import CostManager

-DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
-

 class Workflow:
    def __init__(