Delete unnecessary part & Update Optimize for better use

This commit is contained in:
didi 2024-10-25 16:52:49 +08:00
parent 06c191514b
commit 38c825d04c
6 changed files with 64 additions and 45 deletions

View file

@ -38,14 +38,15 @@ ## Quick Start
- Open `examples/aflow/optimize.py`
- Set the following parameters:
```python
dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
question_type = "code" # Choose from: "math", "code", "qa"
sample = 4 # Number of samples to use for optimization
check_convergence = True # Whether to check for convergence
optimized_path = "path/to/optimized/workflows" # Path to save optimized workflows, defaults to metagpt/ext/aflow/scripts/optimized
initial_round = 1 # Starting round number
max_rounds = 20 # Maximum number of optimization rounds
validation_rounds = 5 # The validation rounds of AFLOW.
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
if_fisrt_optimize = True # You should change it to False after the first optimize.
```
- Adjust these parameters according to your specific requirements and dataset
2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
@ -66,10 +67,13 @@ ## Citation
If you use AFlow in your research, please cite our paper:
```
@article{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
journal={arXiv preprint arXiv:2410.10762},
year={2024}
@misc{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Jiayi Zhang and Jinyu Xiang and Zhaoyang Yu and Fengwei Teng and Xionghui Chen and Jiaqi Chen and Mingchen Zhuge and Xin Cheng and Sirui Hong and Jinlin Wang and Bingnan Zheng and Bang Liu and Yuyu Luo and Chenglin Wu},
year={2024},
eprint={2410.10762},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2410.10762},
}
```

View file

@ -3,6 +3,18 @@
# @Author : didi
# @Desc : Entrance of AFlow.
import os
import sys
def setup_environment():
current_path = os.path.abspath(__file__)
root_path = os.path.dirname(os.path.dirname(os.path.dirname(current_path)))
sys.path.insert(0, root_path)
os.chdir(root_path)
setup_environment()
from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.data.download_data import download
@ -13,9 +25,6 @@ from metagpt.ext.aflow.scripts.optimizer import DatasetType, Optimizer, Question
# QuestionType = Literal["math", "code", "qa"]
# OptimizerType = Literal["Graph", "Test"]
# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
download(["datasets", "initial_rounds"])
# Crucial Parameters
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
@ -25,6 +34,7 @@ initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
if_fisrt_optimize = True # You should change it to False after the first optimize.
# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
@ -56,6 +66,8 @@ optimizer = Optimizer(
)
if __name__ == "__main__":
# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
download(["datasets", "initial_rounds"], if_first_download=if_fisrt_optimize)
# Optimize workflow via setting the optimizer's mode to 'Graph'
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'

View file

@ -23,11 +23,9 @@ class BaseBenchmark(ABC):
async with aiofiles.open(self.file_path, mode="r", encoding="utf-8") as file:
async for line in file:
data.append(json.loads(line))
if specific_indices is not None:
filtered_data = [data[i] for i in specific_indices if i < len(data)]
return filtered_data
return data
def save_results_to_csv(self, results: List[Tuple[Any, ...]], columns: List[str]):
@ -35,26 +33,29 @@ class BaseBenchmark(ABC):
avg_score = df["score"].mean()
t_cost = df["cost"].max()
a_cost = t_cost / len(df) if len(df) > 0 else 0
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{avg_score:.5f}_{current_time}.csv"
output_file = os.path.join(self.log_path, filename)
df.to_csv(output_file, index=False)
logger.info(f"Results saved to {output_file}")
return avg_score, a_cost, t_cost
def log_mismatch(self, problem: str, expected_output: Any, prediction: str, extracted_output: Any):
def log_mismatch(
self,
problem: str,
expected_output: Any,
prediction: str,
extracted_output: Any,
extract_answer_code: str = "None",
):
log_data = {
"question": problem,
"right_answer": expected_output,
"model_output": prediction,
"extracted_output": extracted_output,
"extract_answer_code": extract_answer_code,
}
log_file = os.path.join(self.log_path, "log.json")
if os.path.exists(log_file):
with open(log_file, "r", encoding="utf-8") as f:
try:
@ -63,9 +64,7 @@ class BaseBenchmark(ABC):
data = []
else:
data = []
data.append(log_data)
with open(log_file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
@ -89,7 +88,6 @@ class BaseBenchmark(ABC):
return await self.evaluate_problem(problem, graph)
tasks = [sem_evaluate(problem) for problem in data]
return await tqdm_asyncio.gather(*tasks, desc=f"Evaluating {self.name} problems", total=len(data))
async def run_evaluation(self, graph: Callable, va_list: List[int], max_concurrent_tasks: int = 50):

View file

@ -1,3 +1,4 @@
import inspect
import re
from math import isclose
from typing import Any, Callable, List, Tuple
@ -98,6 +99,13 @@ class MATHBenchmark(BaseBenchmark):
pass
return False
def get_function_code(self, func):
try:
source_code = inspect.getsource(func)
return source_code
except OSError:
return "no code"
@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), retry=retry_if_exception_type(Exception), reraise=True)
async def _generate_output(self, graph, input_text):
return await graph(input_text)
@ -111,7 +119,13 @@ class MATHBenchmark(BaseBenchmark):
uni_score, extracted_output = self.calculate_score(expected_output, output)
if uni_score == 0:
self.log_mismatch(input_text, expected_output, output, extracted_output)
self.log_mismatch(
input_text,
expected_output,
output,
extracted_output,
extract_answer_code=self.get_function_code(self.extract_model_answer),
)
return input_text, output, expected_output, uni_score, cost

View file

@ -68,21 +68,12 @@ datasets_to_download: Dict[str, Dict[str, str]] = {
}
def is_directory_empty(path: str) -> bool:
"""Check if the directory is empty"""
return len(os.listdir(path)) == 0
def download(datasets):
def download(required_datasets, if_first_download: bool = True):
"""Main function to process all selected datasets"""
for dataset_name in datasets:
dataset = datasets_to_download[dataset_name]
extract_path = dataset["extract_path"]
if os.path.exists(extract_path) and not is_directory_empty(extract_path):
logger.info(
f"Target folder {extract_path} for {dataset_name} is not empty, skipping download and extraction."
)
continue
process_dataset(dataset["url"], dataset["filename"], extract_path)
if if_first_download:
for dataset_name in required_datasets:
dataset = datasets_to_download[dataset_name]
extract_path = dataset["extract_path"]
process_dataset(dataset["url"], dataset["filename"], extract_path)
else:
logger.info("Skip downloading datasets")