mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-25 00:36:55 +02:00
Update for solving latest review.
This commit is contained in:
parent
f0a3a3f739
commit
d01051abc6
19 changed files with 314 additions and 312 deletions
|
Before Width: | Height: | Size: 302 KiB After Width: | Height: | Size: 302 KiB |
|
Before Width: | Height: | Size: 889 KiB After Width: | Height: | Size: 889 KiB |
|
Before Width: | Height: | Size: 542 KiB After Width: | Height: | Size: 542 KiB |
|
|
@ -5,7 +5,7 @@ # AFlow: Automating Agentic Workflow Generation
|
|||
[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
|
||||
|
||||
<p align="center">
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFlow" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
|
||||
</p>
|
||||
|
||||
## Framework Components
|
||||
|
|
@ -17,7 +17,7 @@ ## Framework Components
|
|||
- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.
|
||||
|
||||
<p align="center">
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Performance Of AFLOW" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Framework of AFlow" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
|
||||
</p>
|
||||
|
||||
## Datasets
|
||||
|
|
@ -26,7 +26,7 @@ ### Experimental Datasets
|
|||
We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`
|
||||
|
||||
<p align="center">
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>" width="80%"></a>
|
||||
<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFlow" title="Performance Of AFlow <sub>1</sub>" width="80%"></a>
|
||||
</p>
|
||||
|
||||
### Custom Datasets
|
||||
|
|
@ -68,7 +68,7 @@ # Or with custom parameters
|
|||
```
|
||||
|
||||
## Reproduce the Results in the Paper
|
||||
1. We provide the raw data obtained from our experiments ([download link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv)), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
|
||||
1. We provide the raw data obtained from our experiments in this [link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
|
||||
2. You can directly reproduce our experimental results by running the scripts in `examples/aflow/experiments`.
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,50 +3,51 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "DROP" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "qa" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for DROP")
|
||||
parser.add_argument("--dataset", type=str, default="DROP", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="qa", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
"AnswerGenerate", # It's for qa
|
||||
# "CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
# "Test", # It's for code
|
||||
# "Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"AnswerGenerate",
|
||||
"ScEnsemble",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -3,50 +3,51 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for GSM8K")
|
||||
parser.add_argument("--dataset", type=str, default="GSM8K", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="math", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
# "AnswerGenerate", # It's for qa
|
||||
# "CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
# "Test", # It's for code
|
||||
"Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"ScEnsemble",
|
||||
"Programmer",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -3,50 +3,51 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "HotpotQA" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "qa" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for HotpotQA")
|
||||
parser.add_argument("--dataset", type=str, default="HotpotQA", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="qa", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
"AnswerGenerate", # It's for qa
|
||||
# "CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
# "Test", # It's for code
|
||||
# "Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"AnswerGenerate",
|
||||
"ScEnsemble",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -3,50 +3,52 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "HumanEval" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for HumanEval")
|
||||
parser.add_argument("--dataset", type=str, default="HumanEval", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="code", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
# "AnswerGenerate", # It's for qa
|
||||
"CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
"Test", # It's for code
|
||||
# "Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"CustomCodeGenerate",
|
||||
"ScEnsemble",
|
||||
"Test",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -3,50 +3,51 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for MATH")
|
||||
parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="math", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
# "AnswerGenerate", # It's for qa
|
||||
# "CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
# "Test", # It's for code
|
||||
"Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"ScEnsemble",
|
||||
"Programmer",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -3,50 +3,52 @@
|
|||
# @Author : didi
|
||||
# @Desc : Entrance of AFlow.
|
||||
|
||||
import argparse
|
||||
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
|
||||
from metagpt.ext.aflow.scripts.evaluator import Optimizer
|
||||
|
||||
# Crucial Parameters
|
||||
dataset: DatasetType = "MBPP" # Ensure the type is consistent with DatasetType
|
||||
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
|
||||
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
|
||||
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
|
||||
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
|
||||
max_rounds: int = 20 # The max iteration of AFLOW.
|
||||
check_convergence: bool = True # Whether Early Stop
|
||||
validation_rounds: int = 5 # The validation rounds of AFLOW.
|
||||
|
||||
# Config llm model, you can modify `config/config2.yaml` to use more llms.
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer for MBPP")
|
||||
parser.add_argument("--dataset", type=str, default="MBPP", help="Dataset type")
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="code", help="Question type")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
|
||||
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
|
||||
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
|
||||
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
|
||||
return parser.parse_args()
|
||||
|
||||
# Config operators.
|
||||
operators = [
|
||||
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
|
||||
# "AnswerGenerate", # It's for qa
|
||||
"CustomCodeGenerate", # It's for code
|
||||
"ScEnsemble", # It's for code, math and qa
|
||||
"Test", # It's for code
|
||||
# "Programmer", # It's for math
|
||||
]
|
||||
|
||||
# Create an optimizer instance
|
||||
optimizer = Optimizer(
|
||||
dataset=dataset, # Config dataset
|
||||
question_type=question_type, # Config Question Type
|
||||
opt_llm_config=claude_llm_config, # Config Optimizer LLM
|
||||
exec_llm_config=mini_llm_config, # Config Execution LLM
|
||||
check_convergence=check_convergence, # Whether Early Stop
|
||||
operators=operators, # Config Operators you want to use
|
||||
optimized_path=optimized_path, # Config Optimized workflow's file path
|
||||
sample=sample, # Only Top(sample) rounds will be selected.
|
||||
initial_round=initial_round, # Optimize from initial round
|
||||
max_rounds=max_rounds, # The max iteration of AFLOW.
|
||||
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Optimize workflow via setting the optimizer's mode to 'Graph'
|
||||
args = parse_args()
|
||||
|
||||
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
|
||||
operators = [
|
||||
"Custom",
|
||||
"CustomCodeGenerate",
|
||||
"ScEnsemble",
|
||||
"Test",
|
||||
]
|
||||
|
||||
optimizer = Optimizer(
|
||||
dataset=args.dataset,
|
||||
question_type=args.question_type,
|
||||
opt_llm_config=claude_llm_config,
|
||||
exec_llm_config=mini_llm_config,
|
||||
check_convergence=args.check_convergence,
|
||||
operators=operators,
|
||||
optimized_path=args.optimized_path,
|
||||
sample=args.sample,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
validation_rounds=args.validation_rounds,
|
||||
)
|
||||
|
||||
optimizer.optimize("Graph")
|
||||
# Test workflow via setting the optimizer's mode to 'Test'
|
||||
# optimizer.optimize("Test")
|
||||
|
|
|
|||
|
|
@ -9,17 +9,17 @@ from metagpt.configs.models_config import ModelsConfig
|
|||
from metagpt.ext.aflow.data.download_data import download
|
||||
from metagpt.ext.aflow.scripts.optimizer import Optimizer
|
||||
|
||||
# DatasetType, QuestionType, and OptimizerType definitions
|
||||
# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
# QuestionType = Literal["math", "code", "qa"]
|
||||
# OptimizerType = Literal["Graph", "Test"]
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="AFlow Optimizer")
|
||||
parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
|
||||
parser.add_argument(
|
||||
"--dataset",
|
||||
type=str,
|
||||
default="MATH",
|
||||
help="Dataset type, including HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP",
|
||||
)
|
||||
parser.add_argument("--sample", type=int, default=4, help="Sample count")
|
||||
parser.add_argument("--question_type", type=str, default="math", help="Question type")
|
||||
parser.add_argument("--question_type", type=str, default="math", help="Question type, including math, code, qa")
|
||||
parser.add_argument(
|
||||
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -510,8 +510,9 @@ class ActionNode:
|
|||
return {field_name: field.annotation for field_name, field in model_class.model_fields.items()}
|
||||
|
||||
def xml_compile(self, context):
|
||||
# TODO 再来一版
|
||||
|
||||
"""
|
||||
Compile the prompt to make it easier for the model to understand the format.
|
||||
"""
|
||||
field_names = self.get_field_names()
|
||||
# Construct the example using the field names
|
||||
examples = []
|
||||
|
|
|
|||
|
|
@ -11,12 +11,12 @@ import os
|
|||
|
||||
import numpy as np
|
||||
|
||||
from metagpt.utils.common import write_json_file
|
||||
from metagpt.utils.common import read_json_file, write_json_file
|
||||
|
||||
|
||||
def generate_random_indices(n, n_samples, test=False):
|
||||
"""
|
||||
生成随机索引
|
||||
Generate random indices
|
||||
"""
|
||||
|
||||
def _set_seed(seed=42):
|
||||
|
|
@ -52,20 +52,16 @@ def log_mismatch(problem, expected_output, prediction, predicted_number, path):
|
|||
|
||||
log_file = os.path.join(path, "log.json")
|
||||
|
||||
# 检查log文件是否已经存在
|
||||
# Check if the log file already exists
|
||||
if os.path.exists(log_file):
|
||||
# 如果存在,加载现有的日志数据
|
||||
with open(log_file, "r", encoding="utf-8") as f:
|
||||
try:
|
||||
data = json.load(f)
|
||||
except json.JSONDecodeError:
|
||||
data = []
|
||||
# If it exists, load the existing log data
|
||||
data = read_json_file(log_file)
|
||||
else:
|
||||
# 如果不存在,创建一个新的日志列表
|
||||
# If it does not exist, create a new log list
|
||||
data = []
|
||||
|
||||
# 添加新的日志记录
|
||||
# Add the new log entry
|
||||
data.append(log_data)
|
||||
|
||||
# 将数据写回到log.json文件
|
||||
# Write the data back to log.json file
|
||||
write_json_file(log_file, data, encoding="utf-8", indent=4)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from typing import List, Literal
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.ext.aflow.scripts.evaluator import DatasetType
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
|
||||
from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
|
||||
|
|
@ -18,7 +19,6 @@ from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
|
|||
from metagpt.logs import logger
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
|
||||
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
QuestionType = Literal["math", "code", "qa"]
|
||||
OptimizerType = Literal["Graph", "Test"]
|
||||
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ class ConvergenceUtils:
|
|||
if len(self.avg_scores) < top_k + 1:
|
||||
return False, None, None
|
||||
convergence_count = 0 # Convergence counter
|
||||
previous_Y = None # Y value of the previous round (average of top_k scores)
|
||||
sigma_Y_previous = None # Standard error of Y value from previous round
|
||||
previous_y = None # Y value of the previous round (average of top_k scores)
|
||||
sigma_y_previous = None # Standard error of Y value from previous round
|
||||
for i in range(len(self.avg_scores)):
|
||||
# Dynamically select top_k from current round and all previous rounds
|
||||
top_k_indices = np.argsort(self.avg_scores[: i + 1])[::-1][
|
||||
|
|
@ -87,18 +87,18 @@ class ConvergenceUtils:
|
|||
top_k_stds = [
|
||||
self.stds[j] for j in top_k_indices
|
||||
] # Get list of standard deviations corresponding to top k scores
|
||||
# Calculate mean of top k scores for current round, i.e., Y_current
|
||||
Y_current = np.mean(top_k_scores)
|
||||
# Calculate standard error of Y_current (sigma_Y_current), representing score dispersion
|
||||
sigma_Y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
|
||||
# Calculate mean of top k scores for current round, i.e., y_current
|
||||
y_current = np.mean(top_k_scores)
|
||||
# Calculate standard error of y_current (sigma_y_current), representing score dispersion
|
||||
sigma_y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
|
||||
# If not the first round, calculate change in Y (Delta_Y) and corresponding standard error
|
||||
if previous_Y is not None:
|
||||
if previous_y is not None:
|
||||
# Calculate Y difference between current round and previous round
|
||||
Delta_Y = Y_current - previous_Y
|
||||
delta_y = y_current - previous_y
|
||||
# Calculate standard error of Y difference (sigma_Delta_Y)
|
||||
sigma_Delta_Y = np.sqrt(sigma_Y_current**2 + sigma_Y_previous**2)
|
||||
sigma_delta_y = np.sqrt(sigma_y_current**2 + sigma_y_previous**2)
|
||||
# Check if Y change is within acceptable confidence interval, i.e., convergence condition
|
||||
if abs(Delta_Y) <= z * sigma_Delta_Y:
|
||||
if abs(delta_y) <= z * sigma_delta_y:
|
||||
convergence_count += 1
|
||||
# If consecutive converged rounds reach set value, return convergence information
|
||||
if convergence_count >= consecutive_rounds:
|
||||
|
|
@ -107,8 +107,8 @@ class ConvergenceUtils:
|
|||
# If change is large, reset convergence counter
|
||||
convergence_count = 0
|
||||
# Update Y value and standard error for previous round
|
||||
previous_Y = Y_current
|
||||
sigma_Y_previous = sigma_Y_current
|
||||
previous_y = y_current
|
||||
sigma_y_previous = sigma_y_current
|
||||
# If convergence condition not met, return not converged
|
||||
return False, None, None
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.common import read_json_file, write_json_file
|
||||
|
||||
|
||||
class DataUtils:
|
||||
|
|
@ -17,11 +18,7 @@ class DataUtils:
|
|||
def load_results(self, path: str) -> list:
|
||||
result_path = os.path.join(path, "results.json")
|
||||
if os.path.exists(result_path):
|
||||
with open(result_path, "r") as json_file:
|
||||
try:
|
||||
return json.load(json_file)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
return read_json_file(result_path, encoding="utf-8")
|
||||
return []
|
||||
|
||||
def get_top_rounds(self, sample: int, path=None, mode="Graph"):
|
||||
|
|
@ -97,8 +94,7 @@ class DataUtils:
|
|||
if not os.path.exists(log_dir):
|
||||
return "" # 如果文件不存在,返回空字符串
|
||||
logger.info(log_dir)
|
||||
with open(log_dir, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
data = read_json_file(log_dir, encoding="utf-8")
|
||||
|
||||
if isinstance(data, dict):
|
||||
data = [data]
|
||||
|
|
@ -125,8 +121,7 @@ class DataUtils:
|
|||
return {"round": round, "score": score, "avg_cost": avg_cost, "total_cost": total_cost, "time": now}
|
||||
|
||||
def save_results(self, json_file_path: str, data: list):
|
||||
with open(json_file_path, "w") as json_file:
|
||||
json.dump(data, json_file, default=str, indent=4)
|
||||
write_json_file(json_file_path, data, encoding="utf-8", indent=4)
|
||||
|
||||
def _load_scores(self, path=None, mode="Graph"):
|
||||
if mode == "Graph":
|
||||
|
|
@ -137,8 +132,7 @@ class DataUtils:
|
|||
result_file = os.path.join(rounds_dir, "results.json")
|
||||
self.top_scores = []
|
||||
|
||||
with open(result_file, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
data = read_json_file(result_file, encoding="utf-8")
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
scores_per_round = df.groupby("round")["score"].mean().to_dict()
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import os
|
|||
from collections import defaultdict
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.common import read_json_file, write_json_file
|
||||
|
||||
|
||||
class ExperienceUtils:
|
||||
|
|
@ -24,23 +25,22 @@ class ExperienceUtils:
|
|||
round_number = int(round_dir.split("_")[1])
|
||||
json_file_path = os.path.join(round_path, "experience.json")
|
||||
if os.path.exists(json_file_path):
|
||||
with open(json_file_path, "r", encoding="utf-8") as json_file:
|
||||
data = json.load(json_file)
|
||||
father_node = data["father node"]
|
||||
data = read_json_file(json_file_path, encoding="utf-8")
|
||||
father_node = data["father node"]
|
||||
|
||||
if experience_data[father_node]["score"] is None:
|
||||
experience_data[father_node]["score"] = data["before"]
|
||||
if experience_data[father_node]["score"] is None:
|
||||
experience_data[father_node]["score"] = data["before"]
|
||||
|
||||
if data["succeed"]:
|
||||
experience_data[father_node]["success"][round_number] = {
|
||||
"modification": data["modification"],
|
||||
"score": data["after"],
|
||||
}
|
||||
else:
|
||||
experience_data[father_node]["failure"][round_number] = {
|
||||
"modification": data["modification"],
|
||||
"score": data["after"],
|
||||
}
|
||||
if data["succeed"]:
|
||||
experience_data[father_node]["success"][round_number] = {
|
||||
"modification": data["modification"],
|
||||
"score": data["after"],
|
||||
}
|
||||
else:
|
||||
experience_data[father_node]["failure"][round_number] = {
|
||||
"modification": data["modification"],
|
||||
"score": data["after"],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.info(f"Error processing {round_dir}: {str(e)}")
|
||||
|
||||
|
|
@ -93,5 +93,4 @@ class ExperienceUtils:
|
|||
experience["after"] = avg_score
|
||||
experience["succeed"] = bool(avg_score > experience["before"])
|
||||
|
||||
with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
|
||||
json.dump(experience, file, ensure_ascii=False, indent=4)
|
||||
write_json_file(os.path.join(directory, "experience.json"), experience, encoding="utf-8", indent=4)
|
||||
|
|
|
|||
|
|
@ -6,11 +6,17 @@
|
|||
|
||||
import json
|
||||
import re
|
||||
from enum import Enum
|
||||
from typing import Any, List, Tuple
|
||||
|
||||
|
||||
def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
|
||||
if dataset == "HumanEval":
|
||||
class CodeDataset(Enum):
|
||||
HUMAN_EVAL = "HumanEval"
|
||||
MBPP = "MBPP"
|
||||
|
||||
|
||||
def extract_test_cases_from_jsonl(entry_point: str, dataset: CodeDataset = CodeDataset.HUMAN_EVAL):
|
||||
if dataset == CodeDataset.HUMAN_EVAL:
|
||||
file_path = "metagpt/ext/aflow/data/humaneval_public_test.jsonl"
|
||||
# Retain the original hardcoded test cases
|
||||
hardcoded_cases = {
|
||||
|
|
@ -25,7 +31,7 @@ def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
|
|||
"sum_squares": "",
|
||||
"starts_one_ends": "",
|
||||
}
|
||||
elif dataset == "MBPP":
|
||||
elif dataset == CodeDataset.MBPP:
|
||||
file_path = "metagpt/ext/aflow/data/mbpp_public_test.jsonl"
|
||||
hardcoded_cases = {
|
||||
"remove_odd": "",
|
||||
|
|
|
|||
|
|
@ -3,13 +3,11 @@
|
|||
# @Author : didi
|
||||
# @Desc : Basic Graph Class
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from metagpt.ext.aflow.scripts.evaluator import DatasetType
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
|
||||
|
||||
|
||||
class Workflow:
|
||||
def __init__(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue