mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
mv aflow from example to ext
This commit is contained in:
parent
0b69ffe198
commit
fcc5e19160
29 changed files with 173 additions and 30 deletions
61
examples/aflow/optimize.py
Normal file
61
examples/aflow/optimize.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Date : 8/23/2024 20:00 PM
|
||||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
from metagpt.ext.aflow.scripts.optimizer import Optimizer
|
||||
from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
|
||||
from metagpt.ext.aflow.data.download_data import download
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from typing import Literal
|
||||
|
||||
# DatasetType, QuestionType, and OptimizerType definitions
|
||||
# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
# QuestionType = Literal["math", "code", "qa"]
|
||||
# OptimizerType = Literal["Graph", "Test"]
|
||||
|
||||
# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
|
||||
# download(["datasets", "results", "initial_rounds"])
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "examples/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20
|
||||
check_convergence: bool = True
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
# "AnswerGenerate" # It's for qa
|
||||
# "CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
# "Test", # It's for code
|
||||
"Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds # The max iteration of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
70
metagpt/ext/aflow/README.md
Normal file
70
metagpt/ext/aflow/README.md
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# AFlow: Automating Agentic Workflow Generation
|
||||
|
||||
AFlow is a framework for automatically generating and optimizing Agentic Workflows. It uses Monte Carlo tree search in a code-represented workflow space to find effective workflows, replacing manual development with machine effort. Our approach shows potential to outperform handcrafted workflows on various tasks.
|
||||
|
||||
[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
|
||||
|
||||
[Insert performance graph/image here]
|
||||
|
||||
## Framework Components
|
||||
|
||||
- **Node**: Basic unit of LLM invocation. See `action_node.py` for a flexible interface to control LLM, temperature, format, and prompt.
|
||||
- **Operator**: Predefined combinations of Nodes to enhance search efficiency. Encapsulates common operations like Generate, Format, Review, Revise, Ensemble, Test, and Programmer.
|
||||
- **Workflow**: A sequence of LLM-invoking nodes connected by edges. Can be represented as graphs, neural networks, or code to express various execution structures.
|
||||
- **Optimizer**: Uses LLMs within a Monte Carlo Tree Search variant to explore and refine workflows. Iteratively selects, expands, evaluates, and updates workflows based on performance.
|
||||
- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows.
|
||||
|
||||
## Datasets
|
||||
|
||||
We provide implementations for [list datasets here].
|
||||
|
||||
Data is available at [link to data].
|
||||
|
||||
For custom tasks, [brief instructions or link to documentation].
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Configure your search in `optimize.py`:
|
||||
- Open `examples/aflow/scripts/optimize.py`
|
||||
- Set the following parameters:
|
||||
```python
|
||||
dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
|
||||
question_type = "code" # Choose from: "math", "code", "qa"
|
||||
sample = 5 # Number of samples to use for optimization
|
||||
check_convergence = True # Whether to check for convergence
|
||||
optimized_path = "path/to/optimized/workflows" # Path to save optimized workflows
|
||||
initial_round = 1 # Starting round number
|
||||
max_rounds = 20 # Maximum number of optimization rounds
|
||||
```
|
||||
- Adjust these parameters according to your specific requirements and dataset
|
||||
2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
|
||||
3. Set the operator you want to use in `optimize.py` and in `xxxx`
|
||||
4. Download the init round of six datasets and put them in `xxxxxx`
|
||||
5. Add your custom dataset and corresponding evaluation function:
|
||||
|
||||
- Create a new Python file in the `examples/aflow/benchmark/` directory, named `{custom_dataset_name}.py`
|
||||
- Implement the following key functions in this new file:
|
||||
- `load_data`: for loading the dataset
|
||||
- `evaluate_problem`: for evaluating a single problem solution
|
||||
- `evaluate_all_problems`: for evaluating all problems
|
||||
- `save_results_to_csv`: for saving evaluation results
|
||||
- `optimize_{custom_dataset_name}_evaluation`: main evaluation function that integrates the above functionalities
|
||||
- Add your custom dataset name and config val_list in `examples/aflow/scripts/evaluator.py`
|
||||
|
||||
|
||||
## License
|
||||
|
||||
[License information]
|
||||
|
||||
## Citation
|
||||
|
||||
If you use AFlow in your research, please cite our paper:
|
||||
|
||||
```
|
||||
@article{zhang2024aflow,
|
||||
title={AFlow: Automating Agentic Workflow Generation},
|
||||
author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
|
||||
journal={arXiv preprint arXiv:2410.10762},
|
||||
year={2024}
|
||||
}
|
||||
```
|
||||
|
|
@ -12,7 +12,7 @@ import pandas as pd
|
|||
from tqdm.asyncio import tqdm_asyncio
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
|
||||
class DROPBenchmark(BaseBenchmark):
|
||||
def __init__(self, name: str, file_path: str, log_path: str):
|
||||
|
|
@ -17,7 +17,7 @@ from datetime import datetime
|
|||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
||||
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
|
||||
class GSM8KBenchmark(BaseBenchmark):
|
||||
def __init__(self, name: str, file_path: str, log_path: str):
|
||||
|
|
@ -9,7 +9,7 @@ import os
|
|||
from collections import Counter
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
|
||||
class HotpotQABenchmark(BaseBenchmark):
|
||||
def __init__(self, name: str, file_path: str, log_path: str):
|
||||
|
|
@ -10,7 +10,7 @@ from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_t
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.actions.code_sanitize import sanitize
|
||||
|
||||
class HumanEvalBenchmark(BaseBenchmark):
|
||||
|
|
@ -9,7 +9,7 @@ from typing import Any, Callable, Tuple, List
|
|||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
||||
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
|
||||
class MATHBenchmark(BaseBenchmark):
|
||||
def __init__(self, name: str, file_path: str, log_path: str):
|
||||
|
|
@ -8,7 +8,7 @@ from typing import List, Tuple, Callable, Any, Optional, Dict
|
|||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
||||
|
||||
from metagpt.actions.code_sanitize import sanitize
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
|
||||
class MBPPBenchmark(BaseBenchmark):
|
||||
def __init__(self, name: str, file_path: str, log_path: str):
|
||||
12
metagpt/ext/aflow/config2.example.yaml
Normal file
12
metagpt/ext/aflow/config2.example.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
models:
|
||||
"<model_name>": # model: "gpt-4-turbo" # or gpt-3.5-turbo
|
||||
api_type: "openai" # or azure / ollama / groq etc.
|
||||
base_url: "<your base url>"
|
||||
api_key: "<your api key>"
|
||||
temperature: 0
|
||||
"<model_name>":
|
||||
api_type: "openai"
|
||||
base_url: "<your base url>"
|
||||
api_key: "<your api key>"
|
||||
temperature: 0
|
||||
CALC_USAGE: True
|
||||
BIN
metagpt/ext/aflow/full data(include baselines).zip
Normal file
BIN
metagpt/ext/aflow/full data(include baselines).zip
Normal file
Binary file not shown.
|
|
@ -6,13 +6,13 @@
|
|||
from typing import Literal, Tuple, Optional, Dict
|
||||
import asyncio
|
||||
|
||||
from examples.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from examples.aflow.benchmark.gsm8k import GSM8KBenchmark
|
||||
from examples.aflow.benchmark.math import MATHBenchmark
|
||||
from examples.aflow.benchmark.humaneval import HumanEvalBenchmark
|
||||
from examples.aflow.benchmark.hotpotqa import HotpotQABenchmark
|
||||
from examples.aflow.benchmark.mbpp import MBPPBenchmark
|
||||
from examples.aflow.benchmark.drop import DROPBenchmark
|
||||
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
|
||||
from metagpt.ext.aflow.benchmark.gsm8k import GSM8KBenchmark
|
||||
from metagpt.ext.aflow.benchmark.math import MATHBenchmark
|
||||
from metagpt.ext.aflow.benchmark.humaneval import HumanEvalBenchmark
|
||||
from metagpt.ext.aflow.benchmark.hotpotqa import HotpotQABenchmark
|
||||
from metagpt.ext.aflow.benchmark.mbpp import MBPPBenchmark
|
||||
from metagpt.ext.aflow.benchmark.drop import DROPBenchmark
|
||||
|
||||
# If you want to customize tasks, add task types here and provide evaluation functions, just like the ones given above
|
||||
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
|
|
@ -11,9 +11,9 @@ from typing import Dict, List, Tuple
|
|||
|
||||
import concurrent.futures
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
from examples.aflow.scripts.utils import extract_test_cases_from_jsonl
|
||||
from metagpt.ext.aflow.scripts.utils import extract_test_cases_from_jsonl
|
||||
|
||||
from examples.aflow.scripts.operator_an import (
|
||||
from metagpt.ext.aflow.scripts.operator_an import (
|
||||
FormatOp,
|
||||
GenerateOp,
|
||||
CodeGenerateOp,
|
||||
|
|
@ -25,7 +25,7 @@ from examples.aflow.scripts.operator_an import (
|
|||
ReviseOp,
|
||||
|
||||
)
|
||||
from examples.aflow.scripts.prompts.prompt import (
|
||||
from metagpt.ext.aflow.scripts.prompts.prompt import (
|
||||
FORMAT_PROMPT,
|
||||
ANSWER_GENERATION_PROMPT,
|
||||
SC_ENSEMBLE_PROMPT,
|
||||
|
|
@ -35,7 +35,7 @@ from examples.aflow.scripts.prompts.prompt import (
|
|||
REVIEW_PROMPT,
|
||||
REVISE_PROMPT,
|
||||
)
|
||||
from examples.aflow.scripts.utils import test_case_2_test_function
|
||||
from metagpt.ext.aflow.scripts.utils import test_case_2_test_function
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.logs import logger
|
||||
BIN
metagpt/ext/aflow/scripts/optimized/optimized.zip
Normal file
BIN
metagpt/ext/aflow/scripts/optimized/optimized.zip
Normal file
Binary file not shown.
|
|
@ -12,11 +12,11 @@ from pydantic import BaseModel, Field
|
|||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.logs import logger
|
||||
from examples.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
|
||||
from examples.aflow.scripts.optimizer_utils.data_utils import DataUtils
|
||||
from examples.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
|
||||
from examples.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
|
||||
from examples.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.experience_utils import ExperienceUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
|
||||
|
||||
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
QuestionType = Literal["math", "code", "qa"]
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from examples.aflow.scripts.evaluator import Evaluator
|
||||
from metagpt.ext.aflow.scripts.evaluator import Evaluator
|
||||
|
||||
|
||||
class EvaluationUtils:
|
||||
|
|
@ -6,7 +6,7 @@ import traceback
|
|||
import time
|
||||
from metagpt.logs import logger
|
||||
|
||||
from examples.aflow.scripts.prompts.optimize_prompt import (
|
||||
from metagpt.ext.aflow.scripts.prompts.optimize_prompt import (
|
||||
WORKFLOW_CUSTOM_USE,
|
||||
WORKFLOW_INPUT,
|
||||
WORKFLOW_OPTIMIZE_PROMPT,
|
||||
|
|
@ -48,8 +48,8 @@ Note: In custom, the input and instruction are directly concatenated(instruction
|
|||
"""
|
||||
|
||||
WORKFLOW_TEMPLATE = """from typing import Literal
|
||||
import examples.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
|
||||
import examples.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
|
||||
import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.template.operator as operator
|
||||
import metagpt.ext.aflow.scripts.optimized.{dataset}.workflows.round_{round}.prompt as prompt_custom
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
from typing import Literal
|
||||
|
||||
from examples.aflow.scripts.operator import Generate
|
||||
from metagpt.ext.aflow.scripts.operator import Generate
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
|
|
@ -3,9 +3,9 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
from examples.aflow.scripts.optimizer import Optimizer
|
||||
from examples.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
|
||||
from examples.aflow.data.download_data import download
|
||||
from metagpt.ext.aflow.scripts.optimizer import Optimizer
|
||||
from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType
|
||||
from metagpt.ext.aflow.data.download_data import download
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from typing import Literal
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue