Fix jinyu's review and add reproduce's code in experiment folder

2026-05-15 11:02:36 +02:00 · 2024-10-24 18:52:37 +08:00 · 2024-10-24 18:52:37 +08:00 · 39ae68225b
commit 39ae68225b
parent d2f90dbda0
11 changed files with 399 additions and 8 deletions
--- a/examples/aflow/README.md
+++ b/examples/aflow/README.md
@ -4,6 +4,10 @@ # AFlow: Automating Agentic Workflow Generation

 [Read our paper on arXiv](https://arxiv.org/abs/2410.10762)

+<p align="center">
+<a href=""><img src="../../docs/resources/AFLOW-performance.png" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>"></a>
+</p>
+
 ## Framework Components

 - **Node**: Basic unit of LLM invocation. See `metagpt/actions/action_node.py` for a flexible interface to control LLM, temperature, format, and prompt.
@ -12,11 +16,19 @@ ## Framework Components
 - **Optimizer**: Uses LLMs within a Monte Carlo Tree Search variant to explore and refine workflows. Iteratively selects, expands, evaluates, and updates workflows based on performance. See `metagpt/ext/aflow/scripts/optimizer.py` for details.
 - **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.

+<p align="center">
+<a href=""><img src="../../docs/resources/AFLOW-method.png" alt="Performance Of AFLOW" title="Framework of AFlow<sub>1</sub>"></a>
+</p>
+
 ## Datasets

 ### Experimental Datasets
 We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`

+<p align="center">
+<a href=""><img src="../../docs/resources/AFLOW-experiment.png" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>"></a>
+</p>
+
 ### Custom Datasets
 For custom tasks, you can reference the code in the metagpt/ext/aflow/benchmark folder. Inherit the `BaseBenchmark` class and implement `evaluate_problem`, `calculate_score`, and `get_result_columns` to add your custom dataset benchmark. Then, add your benchmark name in `metagpt/ext/aflow/scripts/evaluator.py` and `metagpt/ext/aflow/scripts/optimizer.py` to find effective workflows for your custom dataset.

--- a/examples/aflow/experiments/optimize_drop.py
+++ b/examples/aflow/experiments/optimize_drop.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "DROP"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    "AnswerGenerate",              # It's for qa
+    # "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    # "Test",                       # It's for code
+    # "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_gsm8k.py
+++ b/examples/aflow/experiments/optimize_gsm8k.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    # "AnswerGenerate",              # It's for qa
+    # "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    # "Test",                       # It's for code
+    "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_hotpotqa.py
+++ b/examples/aflow/experiments/optimize_hotpotqa.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "HotpotQA"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "qa"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    "AnswerGenerate",              # It's for qa
+    # "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    # "Test",                       # It's for code
+    # "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_humaneval.py
+++ b/examples/aflow/experiments/optimize_humaneval.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "HumanEval"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    # "AnswerGenerate",              # It's for qa
+    "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    "Test",                       # It's for code
+    # "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_math.py
+++ b/examples/aflow/experiments/optimize_math.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    # "AnswerGenerate",              # It's for qa
+    # "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    # "Test",                       # It's for code
+    "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/experiments/optimize_mbpp.py
+++ b/examples/aflow/experiments/optimize_mbpp.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+
+from metagpt.configs.models_config import ModelsConfig
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "MBPP"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    # "AnswerGenerate",              # It's for qa
+    "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",  # It's for code, math and qa
+    "Test",                       # It's for code
+    # "Programmer",  # It's for math
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,  # Config dataset
+    question_type=question_type,  # Config Question Type
+    opt_llm_config=claude_llm_config,  # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,  # Config Execution LLM
+    check_convergence=check_convergence,  # Whether Early Stop
+    operators=operators,  # Config Operators you want to use
+    optimized_path=optimized_path,  # Config Optimized workflow's file path
+    sample=sample,  # Only Top(sample) rounds will be selected.
+    initial_round=initial_round,  # Optimize from initial round
+    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/examples/aflow/optimize.py
+++ b/examples/aflow/optimize.py
@ -17,13 +17,14 @@ from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, Question
 download(["datasets", "initial_rounds"])

 # Crucial Parameters
-dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
+dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
 sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
-question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
+question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
 optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
 initial_round: int = 1  # Corrected the case from Initial_round to initial_round
-max_rounds: int = 20
-check_convergence: bool = True
+max_rounds: int = 20  # The max iteration of AFLOW.
+check_convergence: bool = True  # Whether Early Stop
+validation_rounds: int = 5 # The validation rounds of AFLOW.

 # Config llm model, you can modify `config/config2.yaml` to use more llms.
 mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
@ -32,7 +33,7 @@ claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
 # Config operators.
 operators = [
    "Custom",  # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
-    # "AnswerGenerate"              # It's for qa
+    # "AnswerGenerate",              # It's for qa
    # "CustomCodeGenerate",         # It's for code
    "ScEnsemble",  # It's for code, math and qa
    # "Test",                       # It's for code
@ -51,6 +52,7 @@ optimizer = Optimizer(
    sample=sample,  # Only Top(sample) rounds will be selected.
    initial_round=initial_round,  # Optimize from initial round
    max_rounds=max_rounds,  # The max iteration of AFLOW.
+    validation_rounds=validation_rounds,  # The validation rounds of AFLOW.
 )

 if __name__ == "__main__":