mv aflow from example to ext

This commit is contained in:
didi 2024-10-22 10:54:06 +08:00
parent 0b69ffe198
commit fcc5e19160
29 changed files with 173 additions and 30 deletions

View file

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# @Date : 8/23/2024 20:00 PM
# @Author : didi
# @Desc : Entrance of AFlow.
from metagpt.ext.aflow.scripts.optimizer import Optimizer
from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
from metagpt.ext.aflow.data.download_data import download
from metagpt.configs.models_config import ModelsConfig
from typing import Literal
# DatasetType, QuestionType, and OptimizerType definitions
# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
# QuestionType = Literal["math", "code", "qa"]
# OptimizerType = Literal["Graph", "Test"]
# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
# download(["datasets", "results", "initial_rounds"])
# Crucial Parameters
dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
optimized_path: str = "examples/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20
check_convergence: bool = True
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate" # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
"Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds # The max iteration of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -0,0 +1,70 @@
# AFlow: Automating Agentic Workflow Generation
AFlow is a framework for automatically generating and optimizing Agentic Workflows. It uses Monte Carlo tree search in a code-represented workflow space to find effective workflows, replacing manual development with machine effort. Our approach shows potential to outperform handcrafted workflows on various tasks.
[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
[Insert performance graph/image here]
## Framework Components
- **Node**: Basic unit of LLM invocation. See `action_node.py` for a flexible interface to control LLM, temperature, format, and prompt.
- **Operator**: Predefined combinations of Nodes to enhance search efficiency. Encapsulates common operations like Generate, Format, Review, Revise, Ensemble, Test, and Programmer.
- **Workflow**: A sequence of LLM-invoking nodes connected by edges. Can be represented as graphs, neural networks, or code to express various execution structures.
- **Optimizer**: Uses LLMs within a Monte Carlo Tree Search variant to explore and refine workflows. Iteratively selects, expands, evaluates, and updates workflows based on performance.
- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows.
## Datasets
We provide implementations for [list datasets here].
Data is available at [link to data].
For custom tasks, [brief instructions or link to documentation].
## Quick Start
1. Configure your search in `optimize.py`:
- Open `examples/aflow/scripts/optimize.py`
- Set the following parameters:
```python
dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
question_type = "code" # Choose from: "math", "code", "qa"
sample = 5 # Number of samples to use for optimization
check_convergence = True # Whether to check for convergence
optimized_path = "path/to/optimized/workflows" # Path to save optimized workflows
initial_round = 1 # Starting round number
max_rounds = 20 # Maximum number of optimization rounds
```
- Adjust these parameters according to your specific requirements and dataset
2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
3. Set the operator you want to use in `optimize.py` and in `xxxx`
4. Download the init round of six datasets and put them in `xxxxxx`
5. Add your custom dataset and corresponding evaluation function:
- Create a new Python file in the `examples/aflow/benchmark/` directory, named `{custom_dataset_name}.py`
- Implement the following key functions in this new file:
- `load_data`: for loading the dataset
- `evaluate_problem`: for evaluating a single problem solution
- `evaluate_all_problems`: for evaluating all problems
- `save_results_to_csv`: for saving evaluation results
- `optimize_{custom_dataset_name}_evaluation`: main evaluation function that integrates the above functionalities
- Add your custom dataset name and config val_list in `examples/aflow/scripts/evaluator.py`
## License
[License information]
## Citation
If you use AFlow in your research, please cite our paper:
```
@article{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
journal={arXiv preprint arXiv:2410.10762},
year={2024}
}
```

View file

@ -12,7 +12,7 @@ import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
class DROPBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -17,7 +17,7 @@ from datetime import datetime
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
class GSM8KBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -9,7 +9,7 @@ import os
from collections import Counter
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
class HotpotQABenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -10,7 +10,7 @@ from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_t
import pandas as pd
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.actions.code_sanitize import sanitize
class HumanEvalBenchmark(BaseBenchmark):

View file

@ -9,7 +9,7 @@ from typing import Any, Callable, Tuple, List
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
class MATHBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -8,7 +8,7 @@ from typing import List, Tuple, Callable, Any, Optional, Dict
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
from metagpt.actions.code_sanitize import sanitize
from examples.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
class MBPPBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -0,0 +1,12 @@
models:
"<model_name>": # model: "gpt-4-turbo" # or gpt-3.5-turbo
api_type: "openai" # or azure / ollama / groq etc.
base_url: "<your base url>"
api_key: "<your api key>"
temperature: 0
"<model_name>":
api_type: "openai"
base_url: "<your base url>"
api_key: "<your api key>"
temperature: 0
CALC_USAGE: True

Binary file not shown.

View file

@ -6,13 +6,13 @@
from typing import Literal, Tuple, Optional, Dict
import asyncio
from examples.aflow.benchmark.benchmark import BaseBenchmark
from examples.aflow.benchmark.gsm8k import GSM8KBenchmark
from examples.aflow.benchmark.math import MATHBenchmark
from examples.aflow.benchmark.humaneval import HumanEvalBenchmark
from examples.aflow.benchmark.hotpotqa import HotpotQABenchmark
from examples.aflow.benchmark.mbpp import MBPPBenchmark
from examples.aflow.benchmark.drop import DROPBenchmark
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.ext.aflow.benchmark.gsm8k import GSM8KBenchmark
from metagpt.ext.aflow.benchmark.math import MATHBenchmark
from metagpt.ext.aflow.benchmark.humaneval import HumanEvalBenchmark
from metagpt.ext.aflow.benchmark.hotpotqa import HotpotQABenchmark
from metagpt.ext.aflow.benchmark.mbpp import MBPPBenchmark
from metagpt.ext.aflow.benchmark.drop import DROPBenchmark
# If you want to customize tasks, add task types here and provide evaluation functions, just like the ones given above
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]

View file

@ -11,9 +11,9 @@ from typing import Dict, List, Tuple
import concurrent.futures
from tenacity import retry, stop_after_attempt, wait_fixed
from examples.aflow.scripts.utils import extract_test_cases_from_jsonl
from metagpt.ext.aflow.scripts.utils import extract_test_cases_from_jsonl
from examples.aflow.scripts.operator_an import (
from metagpt.ext.aflow.scripts.operator_an import (
FormatOp,
GenerateOp,
CodeGenerateOp,
@ -25,7 +25,7 @@ from examples.aflow.scripts.operator_an import (
ReviseOp,
)
from examples.aflow.scripts.prompts.prompt import (
from metagpt.ext.aflow.scripts.prompts.prompt import (
FORMAT_PROMPT,
ANSWER_GENERATION_PROMPT,
SC_ENSEMBLE_PROMPT,
@ -35,7 +35,7 @@ from examples.aflow.scripts.prompts.prompt import (
REVIEW_PROMPT,
REVISE_PROMPT,
)
from examples.aflow.scripts.utils import test_case_2_test_function
from metagpt.ext.aflow.scripts.utils import test_case_2_test_function
from metagpt.actions.action_node import ActionNode
from metagpt.llm import LLM
from metagpt.logs import logger

Binary file not shown.

View file

@ -12,11 +12,11 @@ from pydantic import BaseModel, Field
from metagpt.actions.action_node import ActionNode
from metagpt.provider.llm_provider_registry import create_llm_instance
from metagpt.logs import logger
from examples.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
from examples.aflow.scripts.optimizer_utils.data_utils import DataUtils
from examples.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
from examples.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
from examples.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
from metagpt.ext.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
QuestionType = Literal["math", "code", "qa"]

View file

@ -1,4 +1,4 @@
from examples.aflow.scripts.evaluator import Evaluator
from metagpt.ext.aflow.scripts.evaluator import Evaluator
class EvaluationUtils:

View file

@ -6,7 +6,7 @@ import traceback
import time
from metagpt.logs import logger
from examples.aflow.scripts.prompts.optimize_prompt import (
from metagpt.ext.aflow.scripts.prompts.optimize_prompt import (
WORKFLOW_CUSTOM_USE,
WORKFLOW_INPUT,
WORKFLOW_OPTIMIZE_PROMPT,

View file

@ -48,8 +48,8 @@ Note: In custom, the input and instruction are directly concatenated(instruction
"""
WORKFLOW_TEMPLATE = """from typing import Literal
import examples.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
import examples.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
from metagpt.provider.llm_provider_registry import create_llm_instance
from metagpt.utils.cost_manager import CostManager

View file

@ -5,7 +5,7 @@
from typing import Literal
from examples.aflow.scripts.operator import Generate
from metagpt.ext.aflow.scripts.operator import Generate
from metagpt.provider.llm_provider_registry import create_llm_instance
from metagpt.utils.cost_manager import CostManager

View file

@ -3,9 +3,9 @@
# @Author : didi
# @Desc : Entrance of AFlow.
from examples.aflow.scripts.optimizer import Optimizer
from examples.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
from examples.aflow.data.download_data import download
from metagpt.ext.aflow.scripts.optimizer import Optimizer
from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
from metagpt.ext.aflow.data.download_data import download
from metagpt.configs.models_config import ModelsConfig
from typing import Literal