Update for solving latest review.

This commit is contained in:
didi 2024-10-29 16:04:13 +08:00
parent f0a3a3f739
commit d01051abc6
19 changed files with 314 additions and 312 deletions

View file

Before

Width:  |  Height:  |  Size: 302 KiB

After

Width:  |  Height:  |  Size: 302 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 889 KiB

After

Width:  |  Height:  |  Size: 889 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 542 KiB

After

Width:  |  Height:  |  Size: 542 KiB

Before After
Before After

View file

@ -5,7 +5,7 @@ # AFlow: Automating Agentic Workflow Generation
[Read our paper on arXiv](https://arxiv.org/abs/2410.10762)
<p align="center">
<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFLOW" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
<a href=""><img src="../../docs/resources/aflow/AFLOW-performance.jpg" alt="Performance Of AFlow" title="Performance of AFlow<sub>1</sub>" width="80%"></a>
</p>
## Framework Components
@ -17,7 +17,7 @@ ## Framework Components
- **Evaluator**: Assesses workflow performance on given tasks. Provides feedback to guide the optimization process towards more effective workflows. See `metagpt/ext/aflow/scripts/evaluator.py` for details.
<p align="center">
<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Performance Of AFLOW" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
<a href=""><img src="../../docs/resources/aflow/AFLOW-method.jpg" alt="Framework of AFlow" title="Framework of AFlow <sub>1</sub>" width="80%"></a>
</p>
## Datasets
@ -26,7 +26,7 @@ ### Experimental Datasets
We conducted experiments on six datasets (HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP) and provide their evaluation code. The data can be found in this [datasets](https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e) link, or you can download them using `metagpt/ext/aflow/data/download_data.py`
<p align="center">
<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFLOW" title="Comparison bewteen AFlow and other methods <sub>1</sub>" width="80%"></a>
<a href=""><img src="../../docs/resources/aflow/AFLOW-experiment.jpg" alt="Performance Of AFlow" title="Performance Of AFlow <sub>1</sub>" width="80%"></a>
</p>
### Custom Datasets
@ -68,7 +68,7 @@ # Or with custom parameters
```
## Reproduce the Results in the Paper
1. We provide the raw data obtained from our experiments ([download link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv)), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
1. We provide the raw data obtained from our experiments in this [link](https://drive.google.com/uc?export=download&id=1Sr5wjgKf3bN8OC7G6cO3ynzJqD4w6_Dv), including the workflows and prompts generated in each iteration, as well as their trajectories on the validation dataset. We also provide the optimal workflow for each dataset and the corresponding data on the test dataset. You can download these data using `metagpt/ext/aflow/data/download_data.py`.
2. You can directly reproduce our experimental results by running the scripts in `examples/aflow/experiments`.

View file

@ -3,50 +3,51 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "DROP" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "qa" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for DROP")
parser.add_argument("--dataset", type=str, default="DROP", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="qa", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
"AnswerGenerate", # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
# "Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"AnswerGenerate",
"ScEnsemble",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -3,50 +3,51 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for GSM8K")
parser.add_argument("--dataset", type=str, default="GSM8K", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="math", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate", # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
"Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"ScEnsemble",
"Programmer",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -3,50 +3,51 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "HotpotQA" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "qa" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for HotpotQA")
parser.add_argument("--dataset", type=str, default="HotpotQA", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="qa", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
"AnswerGenerate", # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
# "Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"AnswerGenerate",
"ScEnsemble",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -3,50 +3,52 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "HumanEval" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for HumanEval")
parser.add_argument("--dataset", type=str, default="HumanEval", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="code", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate", # It's for qa
"CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
"Test", # It's for code
# "Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"CustomCodeGenerate",
"ScEnsemble",
"Test",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -3,50 +3,51 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for MATH")
parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="math", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate", # It's for qa
# "CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
# "Test", # It's for code
"Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"ScEnsemble",
"Programmer",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -3,50 +3,52 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import argparse
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, QuestionType
from metagpt.ext.aflow.scripts.evaluator import Optimizer
# Crucial Parameters
dataset: DatasetType = "MBPP" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer for MBPP")
parser.add_argument("--dataset", type=str, default="MBPP", help="Dataset type")
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="code", help="Question type")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)
parser.add_argument("--initial_round", type=int, default=1, help="Initial round")
parser.add_argument("--max_rounds", type=int, default=20, help="Max iteration rounds")
parser.add_argument("--check_convergence", type=bool, default=True, help="Whether to enable early stop")
parser.add_argument("--validation_rounds", type=int, default=5, help="Validation rounds")
return parser.parse_args()
# Config operators.
operators = [
"Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes.
# "AnswerGenerate", # It's for qa
"CustomCodeGenerate", # It's for code
"ScEnsemble", # It's for code, math and qa
"Test", # It's for code
# "Programmer", # It's for math
]
# Create an optimizer instance
optimizer = Optimizer(
dataset=dataset, # Config dataset
question_type=question_type, # Config Question Type
opt_llm_config=claude_llm_config, # Config Optimizer LLM
exec_llm_config=mini_llm_config, # Config Execution LLM
check_convergence=check_convergence, # Whether Early Stop
operators=operators, # Config Operators you want to use
optimized_path=optimized_path, # Config Optimized workflow's file path
sample=sample, # Only Top(sample) rounds will be selected.
initial_round=initial_round, # Optimize from initial round
max_rounds=max_rounds, # The max iteration of AFLOW.
validation_rounds=validation_rounds, # The validation rounds of AFLOW.
)
if __name__ == "__main__":
# Optimize workflow via setting the optimizer's mode to 'Graph'
args = parse_args()
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
operators = [
"Custom",
"CustomCodeGenerate",
"ScEnsemble",
"Test",
]
optimizer = Optimizer(
dataset=args.dataset,
question_type=args.question_type,
opt_llm_config=claude_llm_config,
exec_llm_config=mini_llm_config,
check_convergence=args.check_convergence,
operators=operators,
optimized_path=args.optimized_path,
sample=args.sample,
initial_round=args.initial_round,
max_rounds=args.max_rounds,
validation_rounds=args.validation_rounds,
)
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
# optimizer.optimize("Test")

View file

@ -9,17 +9,17 @@ from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.data.download_data import download
from metagpt.ext.aflow.scripts.optimizer import Optimizer
# DatasetType, QuestionType, and OptimizerType definitions
# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
# QuestionType = Literal["math", "code", "qa"]
# OptimizerType = Literal["Graph", "Test"]
def parse_args():
parser = argparse.ArgumentParser(description="AFlow Optimizer")
parser.add_argument("--dataset", type=str, default="MATH", help="Dataset type")
parser.add_argument(
"--dataset",
type=str,
default="MATH",
help="Dataset type, including HumanEval, MBPP, GSM8K, MATH, HotpotQA, DROP",
)
parser.add_argument("--sample", type=int, default=4, help="Sample count")
parser.add_argument("--question_type", type=str, default="math", help="Question type")
parser.add_argument("--question_type", type=str, default="math", help="Question type, including math, code, qa")
parser.add_argument(
"--optimized_path", type=str, default="metagpt/ext/aflow/scripts/optimized", help="Optimized result save path"
)

View file

@ -510,8 +510,9 @@ class ActionNode:
return {field_name: field.annotation for field_name, field in model_class.model_fields.items()}
def xml_compile(self, context):
# TODO 再来一版
"""
Compile the prompt to make it easier for the model to understand the format.
"""
field_names = self.get_field_names()
# Construct the example using the field names
examples = []

View file

@ -11,12 +11,12 @@ import os
import numpy as np
from metagpt.utils.common import write_json_file
from metagpt.utils.common import read_json_file, write_json_file
def generate_random_indices(n, n_samples, test=False):
"""
生成随机索引
Generate random indices
"""
def _set_seed(seed=42):
@ -52,20 +52,16 @@ def log_mismatch(problem, expected_output, prediction, predicted_number, path):
log_file = os.path.join(path, "log.json")
# 检查log文件是否已经存在
# Check if the log file already exists
if os.path.exists(log_file):
# 如果存在,加载现有的日志数据
with open(log_file, "r", encoding="utf-8") as f:
try:
data = json.load(f)
except json.JSONDecodeError:
data = []
# If it exists, load the existing log data
data = read_json_file(log_file)
else:
# 如果不存在,创建一个新的日志列表
# If it does not exist, create a new log list
data = []
# 添加新的日志记录
# Add the new log entry
data.append(log_data)
# 将数据写回到log.json文件
# Write the data back to log.json file
write_json_file(log_file, data, encoding="utf-8", indent=4)

View file

@ -10,6 +10,7 @@ from typing import List, Literal
from pydantic import BaseModel, Field
from metagpt.actions.action_node import ActionNode
from metagpt.ext.aflow.scripts.evaluator import DatasetType
from metagpt.ext.aflow.scripts.optimizer_utils.convergence_utils import ConvergenceUtils
from metagpt.ext.aflow.scripts.optimizer_utils.data_utils import DataUtils
from metagpt.ext.aflow.scripts.optimizer_utils.evaluation_utils import EvaluationUtils
@ -18,7 +19,6 @@ from metagpt.ext.aflow.scripts.optimizer_utils.graph_utils import GraphUtils
from metagpt.logs import logger
from metagpt.provider.llm_provider_registry import create_llm_instance
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
QuestionType = Literal["math", "code", "qa"]
OptimizerType = Literal["Graph", "Test"]

View file

@ -76,8 +76,8 @@ class ConvergenceUtils:
if len(self.avg_scores) < top_k + 1:
return False, None, None
convergence_count = 0 # Convergence counter
previous_Y = None # Y value of the previous round (average of top_k scores)
sigma_Y_previous = None # Standard error of Y value from previous round
previous_y = None # Y value of the previous round (average of top_k scores)
sigma_y_previous = None # Standard error of Y value from previous round
for i in range(len(self.avg_scores)):
# Dynamically select top_k from current round and all previous rounds
top_k_indices = np.argsort(self.avg_scores[: i + 1])[::-1][
@ -87,18 +87,18 @@ class ConvergenceUtils:
top_k_stds = [
self.stds[j] for j in top_k_indices
] # Get list of standard deviations corresponding to top k scores
# Calculate mean of top k scores for current round, i.e., Y_current
Y_current = np.mean(top_k_scores)
# Calculate standard error of Y_current (sigma_Y_current), representing score dispersion
sigma_Y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
# Calculate mean of top k scores for current round, i.e., y_current
y_current = np.mean(top_k_scores)
# Calculate standard error of y_current (sigma_y_current), representing score dispersion
sigma_y_current = np.sqrt(np.sum([s**2 for s in top_k_stds]) / (top_k**2))
# If not the first round, calculate change in Y (Delta_Y) and corresponding standard error
if previous_Y is not None:
if previous_y is not None:
# Calculate Y difference between current round and previous round
Delta_Y = Y_current - previous_Y
delta_y = y_current - previous_y
# Calculate standard error of Y difference (sigma_Delta_Y)
sigma_Delta_Y = np.sqrt(sigma_Y_current**2 + sigma_Y_previous**2)
sigma_delta_y = np.sqrt(sigma_y_current**2 + sigma_y_previous**2)
# Check if Y change is within acceptable confidence interval, i.e., convergence condition
if abs(Delta_Y) <= z * sigma_Delta_Y:
if abs(delta_y) <= z * sigma_delta_y:
convergence_count += 1
# If consecutive converged rounds reach set value, return convergence information
if convergence_count >= consecutive_rounds:
@ -107,8 +107,8 @@ class ConvergenceUtils:
# If change is large, reset convergence counter
convergence_count = 0
# Update Y value and standard error for previous round
previous_Y = Y_current
sigma_Y_previous = sigma_Y_current
previous_y = y_current
sigma_y_previous = sigma_y_current
# If convergence condition not met, return not converged
return False, None, None

View file

@ -7,6 +7,7 @@ import numpy as np
import pandas as pd
from metagpt.logs import logger
from metagpt.utils.common import read_json_file, write_json_file
class DataUtils:
@ -17,11 +18,7 @@ class DataUtils:
def load_results(self, path: str) -> list:
result_path = os.path.join(path, "results.json")
if os.path.exists(result_path):
with open(result_path, "r") as json_file:
try:
return json.load(json_file)
except json.JSONDecodeError:
return []
return read_json_file(result_path, encoding="utf-8")
return []
def get_top_rounds(self, sample: int, path=None, mode="Graph"):
@ -97,8 +94,7 @@ class DataUtils:
if not os.path.exists(log_dir):
return "" # 如果文件不存在,返回空字符串
logger.info(log_dir)
with open(log_dir, "r", encoding="utf-8") as f:
data = json.load(f)
data = read_json_file(log_dir, encoding="utf-8")
if isinstance(data, dict):
data = [data]
@ -125,8 +121,7 @@ class DataUtils:
return {"round": round, "score": score, "avg_cost": avg_cost, "total_cost": total_cost, "time": now}
def save_results(self, json_file_path: str, data: list):
with open(json_file_path, "w") as json_file:
json.dump(data, json_file, default=str, indent=4)
write_json_file(json_file_path, data, encoding="utf-8", indent=4)
def _load_scores(self, path=None, mode="Graph"):
if mode == "Graph":
@ -137,8 +132,7 @@ class DataUtils:
result_file = os.path.join(rounds_dir, "results.json")
self.top_scores = []
with open(result_file, "r", encoding="utf-8") as file:
data = json.load(file)
data = read_json_file(result_file, encoding="utf-8")
df = pd.DataFrame(data)
scores_per_round = df.groupby("round")["score"].mean().to_dict()

View file

@ -3,6 +3,7 @@ import os
from collections import defaultdict
from metagpt.logs import logger
from metagpt.utils.common import read_json_file, write_json_file
class ExperienceUtils:
@ -24,23 +25,22 @@ class ExperienceUtils:
round_number = int(round_dir.split("_")[1])
json_file_path = os.path.join(round_path, "experience.json")
if os.path.exists(json_file_path):
with open(json_file_path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)
father_node = data["father node"]
data = read_json_file(json_file_path, encoding="utf-8")
father_node = data["father node"]
if experience_data[father_node]["score"] is None:
experience_data[father_node]["score"] = data["before"]
if experience_data[father_node]["score"] is None:
experience_data[father_node]["score"] = data["before"]
if data["succeed"]:
experience_data[father_node]["success"][round_number] = {
"modification": data["modification"],
"score": data["after"],
}
else:
experience_data[father_node]["failure"][round_number] = {
"modification": data["modification"],
"score": data["after"],
}
if data["succeed"]:
experience_data[father_node]["success"][round_number] = {
"modification": data["modification"],
"score": data["after"],
}
else:
experience_data[father_node]["failure"][round_number] = {
"modification": data["modification"],
"score": data["after"],
}
except Exception as e:
logger.info(f"Error processing {round_dir}: {str(e)}")
@ -93,5 +93,4 @@ class ExperienceUtils:
experience["after"] = avg_score
experience["succeed"] = bool(avg_score > experience["before"])
with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
json.dump(experience, file, ensure_ascii=False, indent=4)
write_json_file(os.path.join(directory, "experience.json"), experience, encoding="utf-8", indent=4)

View file

@ -6,11 +6,17 @@
import json
import re
from enum import Enum
from typing import Any, List, Tuple
def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
if dataset == "HumanEval":
class CodeDataset(Enum):
HUMAN_EVAL = "HumanEval"
MBPP = "MBPP"
def extract_test_cases_from_jsonl(entry_point: str, dataset: CodeDataset = CodeDataset.HUMAN_EVAL):
if dataset == CodeDataset.HUMAN_EVAL:
file_path = "metagpt/ext/aflow/data/humaneval_public_test.jsonl"
# Retain the original hardcoded test cases
hardcoded_cases = {
@ -25,7 +31,7 @@ def extract_test_cases_from_jsonl(entry_point: str, dataset: str = "HumanEval"):
"sum_squares": "",
"starts_one_ends": "",
}
elif dataset == "MBPP":
elif dataset == CodeDataset.MBPP:
file_path = "metagpt/ext/aflow/data/mbpp_public_test.jsonl"
hardcoded_cases = {
"remove_odd": "",

View file

@ -3,13 +3,11 @@
# @Author : didi
# @Desc : Basic Graph Class
from typing import Literal
from metagpt.ext.aflow.scripts.evaluator import DatasetType
from metagpt.provider.llm_provider_registry import create_llm_instance
from metagpt.utils.cost_manager import CostManager
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
class Workflow:
def __init__(