mv aflow from example to ext

2026-07-23 17:01:08 +02:00 · 2024-10-22 10:54:06 +08:00 · 2024-10-22 10:54:06 +08:00 · fcc5e19160
commit fcc5e19160
parent 0b69ffe198
29 changed files with 173 additions and 30 deletions
--- a/examples/aflow/optimize.py
+++ b/examples/aflow/optimize.py
@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# @Date    : 8/23/2024 20:00 PM
+# @Author  : didi
+# @Desc    : Entrance of AFlow.
+
+from metagpt.ext.aflow.scripts.optimizer import Optimizer
+from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
+from metagpt.ext.aflow.data.download_data import download
+from metagpt.configs.models_config import ModelsConfig
+from typing import Literal
+
+# DatasetType, QuestionType, and OptimizerType definitions
+# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
+# QuestionType = Literal["math", "code", "qa"]
+# OptimizerType = Literal["Graph", "Test"]
+
+# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+# download(["datasets", "results", "initial_rounds"])
+
+# Crucial Parameters
+dataset: DatasetType = "GSM8K"  # Ensure the type is consistent with DatasetType
+sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type: QuestionType = "code"  # Ensure the type is consistent with QuestionType
+optimized_path: str = "examples/aflow/scripts/optimized"  # Optimized Result Save Path
+initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+max_rounds: int = 20
+check_convergence: bool = True
+
+# Config llm model, you can modify `config/config2.yaml` to use more llms.
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
+claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
+
+# Config operators.
+operators = [
+    "Custom",                       # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
+    # "AnswerGenerate"              # It's for qa
+    # "CustomCodeGenerate",         # It's for code
+    "ScEnsemble",                 # It's for code, math and qa
+    # "Test",                       # It's for code
+    "Programmer",                 # It's for math 
+]
+
+# Create an optimizer instance
+optimizer = Optimizer(
+    dataset=dataset,                        # Config dataset   
+    question_type=question_type,            # Config Question Type
+    opt_llm_config=claude_llm_config,       # Config Optimizer LLM
+    exec_llm_config=mini_llm_config,        # Config Execution LLM
+    check_convergence=check_convergence,    # Whether Early Stop 
+    operators=operators,                    # Config Operators you want to use
+    optimized_path=optimized_path,          # Config Optimized workflow's file path
+    sample=sample,                          # Only Top(sample) rounds will be selected. 
+    initial_round=initial_round,            # Optimize from initial round
+    max_rounds=max_rounds                   # The max iteration of AFLOW.
+)
+
+if __name__ == "__main__":
+    # Optimize workflow via setting the optimizer's mode to 'Graph'
+    optimizer.optimize("Graph")
+    # Test workflow via setting the optimizer's mode to 'Test'
+    # optimizer.optimize("Test")
--- a/metagpt/ext/aflow/README.md
+++ b/metagpt/ext/aflow/README.md
@ -0,0 +1,70 @@
+# AFlow: Automating Agentic Workflow Generation
+
+AFlow is a framework for automatically generating and optimizing Agentic Workflows. It uses Monte Carlo tree search in a code-represented workflow space to find effective workflows, replacing manual development with machine effort. Our approach shows potential to outperform handcrafted workflows on various tasks.
+
+[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
+
+[Insert performance graph/image here]
+
+## Framework Components
+
+- **Node**: Basic unit of LLM invocation. See `action_node.py` for a flexible interface to control LLM, temperature, format, and prompt.
+- **Operator**: Predefined combinations of Nodes to enhance search efficiency. Encapsulates common operations like Generate, Format, Review, Revise, Ensemble, Test, and Programmer.
+- **Workflow**: A sequence of LLM-invoking nodes connected by edges. Can be represented as graphs, neural networks, or code to express various execution structures.
+- **Optimizer**: Uses LLMs within a Monte Carlo Tree Search variant to explore and refine workflows. Iteratively selects, expands, evaluates, and updates workflows based on performance.
+- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows.
+
+## Datasets
+
+We provide implementations for [list datasets here]. 
+
+Data is available at [link to data].
+
+For custom tasks, [brief instructions or link to documentation].
+
+## Quick Start
+
+1. Configure your search in `optimize.py`:
+   - Open `examples/aflow/scripts/optimize.py`
+   - Set the following parameters:
+     ```python
+     dataset = "HumanEval"  # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
+     question_type = "code"  # Choose from: "math", "code", "qa"
+     sample = 5  # Number of samples to use for optimization
+     check_convergence = True  # Whether to check for convergence
+     optimized_path = "path/to/optimized/workflows"  # Path to save optimized workflows
+     initial_round = 1  # Starting round number
+     max_rounds = 20  # Maximum number of optimization rounds
+     ```
+   - Adjust these parameters according to your specific requirements and dataset
+2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
+3. Set the operator you want to use in `optimize.py` and in `xxxx`
+4. Download the init round of six datasets and put them in `xxxxxx`
+5. Add your custom dataset and corresponding evaluation function:
+
+- Create a new Python file in the `examples/aflow/benchmark/` directory, named `{custom_dataset_name}.py`
+- Implement the following key functions in this new file:
+  - `load_data`: for loading the dataset
+  - `evaluate_problem`: for evaluating a single problem solution
+  - `evaluate_all_problems`: for evaluating all problems
+  - `save_results_to_csv`: for saving evaluation results
+  - `optimize_{custom_dataset_name}_evaluation`: main evaluation function that integrates the above functionalities
+- Add your custom dataset name and config val_list in `examples/aflow/scripts/evaluator.py`
+
+
+## License
+
+[License information]
+
+## Citation
+
+If you use AFlow in your research, please cite our paper:
+
+```
+@article{zhang2024aflow,
+  title={AFlow: Automating Agentic Workflow Generation},
+  author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
+  journal={arXiv preprint arXiv:2410.10762},
+  year={2024}
+}
+```
--- a/metagpt/ext/aflow/benchmark/benchmark.py
+++ b/metagpt/ext/aflow/benchmark/benchmark.py
--- a/metagpt/ext/aflow/benchmark/drop.py
+++ b/metagpt/ext/aflow/benchmark/drop.py
@ -12,7 +12,7 @@ import pandas as pd
 from tqdm.asyncio import tqdm_asyncio
 from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type

-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark

 class DROPBenchmark(BaseBenchmark):
    def __init__(self, name: str, file_path: str, log_path: str):
--- a/metagpt/ext/aflow/benchmark/gsm8k.py
+++ b/metagpt/ext/aflow/benchmark/gsm8k.py
@ -17,7 +17,7 @@ from datetime import datetime
 from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type


-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark

 class GSM8KBenchmark(BaseBenchmark):
    def __init__(self, name: str, file_path: str, log_path: str):
--- a/metagpt/ext/aflow/benchmark/hotpotqa.py
+++ b/metagpt/ext/aflow/benchmark/hotpotqa.py
@ -9,7 +9,7 @@ import os
 from collections import Counter
 from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type

-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark

 class HotpotQABenchmark(BaseBenchmark):
    def __init__(self, name: str, file_path: str, log_path: str):
--- a/metagpt/ext/aflow/benchmark/humaneval.py
+++ b/metagpt/ext/aflow/benchmark/humaneval.py
@ -10,7 +10,7 @@ from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_t

 import pandas as pd

-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
 from metagpt.actions.code_sanitize import sanitize

 class HumanEvalBenchmark(BaseBenchmark):
--- a/metagpt/ext/aflow/benchmark/math.py
+++ b/metagpt/ext/aflow/benchmark/math.py
@ -9,7 +9,7 @@ from typing import Any, Callable, Tuple, List
 from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type


-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark

 class MATHBenchmark(BaseBenchmark):
    def __init__(self, name: str, file_path: str, log_path: str):
--- a/metagpt/ext/aflow/benchmark/mbpp.py
+++ b/metagpt/ext/aflow/benchmark/mbpp.py
@ -8,7 +8,7 @@ from typing import List, Tuple, Callable, Any, Optional, Dict
 from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type

 from metagpt.actions.code_sanitize import sanitize
-from examples.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark

 class MBPPBenchmark(BaseBenchmark):
    def __init__(self, name: str, file_path: str, log_path: str):
--- a/metagpt/ext/aflow/benchmark/utils.py
+++ b/metagpt/ext/aflow/benchmark/utils.py
--- a/metagpt/ext/aflow/config2.example.yaml
+++ b/metagpt/ext/aflow/config2.example.yaml
@ -0,0 +1,12 @@
+models:
+ "<model_name>": # model: "gpt-4-turbo"  # or gpt-3.5-turbo
+   api_type: "openai"  # or azure / ollama / groq etc.
+   base_url: "<your base url>" 
+   api_key: "<your api key>"
+   temperature: 0
+ "<model_name>":  
+   api_type: "openai"  
+   base_url: "<your base url>"
+   api_key: "<your api key>"
+   temperature: 0
+CALC_USAGE: True 
--- a/metagpt/ext/aflow/data/download_data.py
+++ b/metagpt/ext/aflow/data/download_data.py
--- a/metagpt/ext/aflow/full
+++ b/metagpt/ext/aflow/full
--- a/metagpt/ext/aflow/scripts/evaluator.py
+++ b/metagpt/ext/aflow/scripts/evaluator.py
@ -6,13 +6,13 @@
 from typing import Literal, Tuple, Optional, Dict
 import asyncio

-from examples.aflow.benchmark.benchmark import BaseBenchmark
-from examples.aflow.benchmark.gsm8k import GSM8KBenchmark
-from examples.aflow.benchmark.math import MATHBenchmark
-from examples.aflow.benchmark.humaneval import HumanEvalBenchmark
-from examples.aflow.benchmark.hotpotqa import HotpotQABenchmark
-from examples.aflow.benchmark.mbpp import MBPPBenchmark
-from examples.aflow.benchmark.drop import DROPBenchmark
+from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
+from metagpt.ext.aflow.benchmark.gsm8k import GSM8KBenchmark
+from metagpt.ext.aflow.benchmark.math import MATHBenchmark
+from metagpt.ext.aflow.benchmark.humaneval import HumanEvalBenchmark
+from metagpt.ext.aflow.benchmark.hotpotqa import HotpotQABenchmark
+from metagpt.ext.aflow.benchmark.mbpp import MBPPBenchmark
+from metagpt.ext.aflow.benchmark.drop import DROPBenchmark

 # If you want to customize tasks, add task types here and provide evaluation functions, just like the ones given above
 DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
--- a/metagpt/ext/aflow/scripts/operator.py
+++ b/metagpt/ext/aflow/scripts/operator.py
@ -11,9 +11,9 @@ from typing import Dict, List, Tuple

 import concurrent.futures
 from tenacity import retry, stop_after_attempt, wait_fixed
-from examples.aflow.scripts.utils import extract_test_cases_from_jsonl
+from metagpt.ext.aflow.scripts.utils import extract_test_cases_from_jsonl

-from examples.aflow.scripts.operator_an import (
+from metagpt.ext.aflow.scripts.operator_an import (
    FormatOp,
    GenerateOp,
    CodeGenerateOp,
@ -25,7 +25,7 @@ from examples.aflow.scripts.operator_an import (
    ReviseOp,

 )
-from examples.aflow.scripts.prompts.prompt import (
+from metagpt.ext.aflow.scripts.prompts.prompt import (
    FORMAT_PROMPT,
    ANSWER_GENERATION_PROMPT,
    SC_ENSEMBLE_PROMPT,
@ -35,7 +35,7 @@ from examples.aflow.scripts.prompts.prompt import (
    REVIEW_PROMPT,
    REVISE_PROMPT,
 )
-from examples.aflow.scripts.utils import test_case_2_test_function
+from metagpt.ext.aflow.scripts.utils import test_case_2_test_function
 from metagpt.actions.action_node import ActionNode
 from metagpt.llm import LLM
 from metagpt.logs import logger
--- a/metagpt/ext/aflow/scripts/operator_an.py
+++ b/metagpt/ext/aflow/scripts/operator_an.py
--- a/metagpt/ext/aflow/scripts/optimized/init.py
+++ b/metagpt/ext/aflow/scripts/optimized/init.py
--- a/metagpt/ext/aflow/scripts/optimized/optimized.zip
+++ b/metagpt/ext/aflow/scripts/optimized/optimized.zip
--- a/metagpt/ext/aflow/scripts/optimizer.py
+++ b/metagpt/ext/aflow/scripts/optimizer.py
@ -12,11 +12,11 @@ from pydantic import BaseModel, Field
 from metagpt.actions.action_node import ActionNode
 from metagpt.provider.llm_provider_registry import create_llm_instance
 from metagpt.logs import logger
-from examples.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
-from examples.aflow.scripts.optimizer_utils.data_utils import DataUtils
-from examples.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
-from examples.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
-from examples.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
+from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
+from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
+from metagpt.ext.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
+from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
+from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils

 DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
 QuestionType = Literal["math", "code", "qa"]
--- a/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/convergence_utils.py
--- a/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/data_utils.py
--- a/metagpt/ext/aflow/scripts/optimizer_utils/evaluation_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/evaluation_utils.py
@ -1,4 +1,4 @@
-from examples.aflow.scripts.evaluator import Evaluator
+from metagpt.ext.aflow.scripts.evaluator import Evaluator


 class EvaluationUtils:
--- a/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/experience_utils.py
--- a/metagpt/ext/aflow/scripts/optimizer_utils/graph_utils.py
+++ b/metagpt/ext/aflow/scripts/optimizer_utils/graph_utils.py
@ -6,7 +6,7 @@ import traceback
 import time
 from metagpt.logs import logger

-from examples.aflow.scripts.prompts.optimize_prompt import (
+from metagpt.ext.aflow.scripts.prompts.optimize_prompt import (
    WORKFLOW_CUSTOM_USE,
    WORKFLOW_INPUT,
    WORKFLOW_OPTIMIZE_PROMPT,
--- a/metagpt/ext/aflow/scripts/prompts/optimize_prompt.py
+++ b/metagpt/ext/aflow/scripts/prompts/optimize_prompt.py
@ -48,8 +48,8 @@ Note: In custom, the input and instruction are directly concatenated(instruction
 """

 WORKFLOW_TEMPLATE = """from typing import Literal
-import examples.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
-import examples.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
+import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
+import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
 from metagpt.provider.llm_provider_registry import create_llm_instance
 from metagpt.utils.cost_manager import CostManager

--- a/metagpt/ext/aflow/scripts/prompts/prompt.py
+++ b/metagpt/ext/aflow/scripts/prompts/prompt.py
--- a/metagpt/ext/aflow/scripts/utils.py
+++ b/metagpt/ext/aflow/scripts/utils.py
--- a/metagpt/ext/aflow/scripts/workflow.py
+++ b/metagpt/ext/aflow/scripts/workflow.py
@ -5,7 +5,7 @@

 from typing import Literal

-from examples.aflow.scripts.operator import Generate
+from metagpt.ext.aflow.scripts.operator import Generate
 from metagpt.provider.llm_provider_registry import create_llm_instance
 from metagpt.utils.cost_manager import CostManager

--- a/optimize.py
+++ b/optimize.py
@ -3,9 +3,9 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.

-from examples.aflow.scripts.optimizer import Optimizer
-from examples.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
-from examples.aflow.data.download_data import download
+from metagpt.ext.aflow.scripts.optimizer import Optimizer
+from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
+from metagpt.ext.aflow.data.download_data import download
 from metagpt.configs.models_config import ModelsConfig
 from typing import Literal