diff --git a/.gitignore b/.gitignore index dea7d91f6..6443b07bd 100644 --- a/.gitignore +++ b/.gitignore @@ -189,6 +189,3 @@ cov.xml *.dot .python-version *.csv -/examples/aflow/data/baselines/general -/examples/aflow/scripts/optimized/HumanEval/graphs -/examples/aflow/scripts/optimized/HumanEval/graphs_test diff --git a/examples/aflow/optimize.py b/examples/aflow/optimize.py index a3f64d86a..2a0b6d5bb 100644 --- a/examples/aflow/optimize.py +++ b/examples/aflow/optimize.py @@ -4,7 +4,7 @@ # @Desc : Entrance of AFlow. from metagpt.ext.aflow.scripts.optimizer import Optimizer -from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType +from metagpt.ext.aflow.scripts.optimizer import DatasetType, QuestionType, OptimizerType from metagpt.ext.aflow.data.download_data import download from metagpt.configs.models_config import ModelsConfig from typing import Literal @@ -15,13 +15,13 @@ from typing import Literal # OptimizerType = Literal["Graph", "Test"] # When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results. -# download(["datasets", "results", "initial_rounds"]) +download(["datasets", "initial_rounds"]) # Crucial Parameters dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType -optimized_path: str = "examples/aflow/scripts/optimized" # Optimized Result Save Path +optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path initial_round: int = 1 # Corrected the case from Initial_round to initial_round max_rounds: int = 20 check_convergence: bool = True diff --git a/examples/aflow/readme.md b/examples/aflow/readme.md index 62b92548d..4fa9bb150 100644 --- a/examples/aflow/readme.md +++ b/examples/aflow/readme.md @@ -25,7 +25,7 @@ ## Datasets ## Quick Start 1. Configure your search in `optimize.py`: - - Open `examples/aflow/scripts/optimize.py` + - Open `metagpt/ext/aflow/scripts/optimize.py` - Set the following parameters: ```python dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name @@ -37,19 +37,19 @@ ## Quick Start max_rounds = 20 # Maximum number of optimization rounds ``` - Adjust these parameters according to your specific requirements and dataset -2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference) +2. Set up parameters in `config/config2.yaml` (see `metagpt/ext/aflow/config2.example.yaml` for reference) 3. Set the operator you want to use in `optimize.py` and in `xxxx` 4. Download the init round of six datasets and put them in `xxxxxx` 5. Add your custom dataset and corresponding evaluation function: -- Create a new Python file in the `examples/aflow/benchmark/` directory, named `{custom_dataset_name}.py` +- Create a new Python file in the `metagpt/ext/aflow/benchmark/` directory, named `{custom_dataset_name}.py` - Implement the following key functions in this new file: - `load_data`: for loading the dataset - `evaluate_problem`: for evaluating a single problem solution - `evaluate_all_problems`: for evaluating all problems - `save_results_to_csv`: for saving evaluation results - `optimize_{custom_dataset_name}_evaluation`: main evaluation function that integrates the above functionalities -- Add your custom dataset name and config val_list in `examples/aflow/scripts/evaluator.py` +- Add your custom dataset name and config val_list in `metagpt/ext/aflow/scripts/evaluator.py` ## License diff --git a/metagpt/ext/aflow/README.md b/metagpt/ext/aflow/README.md index 62b92548d..4fa9bb150 100644 --- a/metagpt/ext/aflow/README.md +++ b/metagpt/ext/aflow/README.md @@ -25,7 +25,7 @@ ## Datasets ## Quick Start 1. Configure your search in `optimize.py`: - - Open `examples/aflow/scripts/optimize.py` + - Open `metagpt/ext/aflow/scripts/optimize.py` - Set the following parameters: ```python dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name @@ -37,19 +37,19 @@ ## Quick Start max_rounds = 20 # Maximum number of optimization rounds ``` - Adjust these parameters according to your specific requirements and dataset -2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference) +2. Set up parameters in `config/config2.yaml` (see `metagpt/ext/aflow/config2.example.yaml` for reference) 3. Set the operator you want to use in `optimize.py` and in `xxxx` 4. Download the init round of six datasets and put them in `xxxxxx` 5. Add your custom dataset and corresponding evaluation function: -- Create a new Python file in the `examples/aflow/benchmark/` directory, named `{custom_dataset_name}.py` +- Create a new Python file in the `metagpt/ext/aflow/benchmark/` directory, named `{custom_dataset_name}.py` - Implement the following key functions in this new file: - `load_data`: for loading the dataset - `evaluate_problem`: for evaluating a single problem solution - `evaluate_all_problems`: for evaluating all problems - `save_results_to_csv`: for saving evaluation results - `optimize_{custom_dataset_name}_evaluation`: main evaluation function that integrates the above functionalities -- Add your custom dataset name and config val_list in `examples/aflow/scripts/evaluator.py` +- Add your custom dataset name and config val_list in `metagpt/ext/aflow/scripts/evaluator.py` ## License diff --git a/metagpt/ext/aflow/config2.example.yaml b/metagpt/ext/aflow/config2.example.yaml deleted file mode 100644 index ebaef33e2..000000000 --- a/metagpt/ext/aflow/config2.example.yaml +++ /dev/null @@ -1,12 +0,0 @@ -models: - "": # model: "gpt-4-turbo" # or gpt-3.5-turbo - api_type: "openai" # or azure / ollama / groq etc. - base_url: "" - api_key: "" - temperature: 0 - "": - api_type: "openai" - base_url: "" - api_key: "" - temperature: 0 -CALC_USAGE: True diff --git a/metagpt/ext/aflow/data/download_data.py b/metagpt/ext/aflow/data/download_data.py index 00aaa7ebc..f3727aea1 100644 --- a/metagpt/ext/aflow/data/download_data.py +++ b/metagpt/ext/aflow/data/download_data.py @@ -42,26 +42,23 @@ def process_dataset(url: str, filename: str, extract_path: str) -> None: # Define the datasets to be downloaded # Users can modify this list to choose which datasets to download -datasets_to_download: List[Dict[str, str]] = [ - { - "name": "datasets", - "url": "https://drive.google.com/uc?export=download&id=1tXp5cLw89egeKRwDuood2TPqoEWd8_C0", +datasets_to_download: Dict[str, Dict[str, str]] = { + "datasets": { + "url": "https://drive.google.com/uc?export=download&id=1DNoegtZiUhWtvkd2xoIuElmIi4ah7k8e", "filename": "aflow_data.tar.gz", - "extract_path": "examples/aflow/data" + "extract_path": "metagpt/ext/aflow/data" }, - { - "name": "results", - "url": "", # Please fill in the correct URL + "results": { + "url": "", # 请填入正确的URL "filename": "result.tar.gz", - "extract_path": "examples/aflow/data/results" + "extract_path": "metagpt/ext/aflow/data/results" }, - { - "name": "initial_rounds", - "url": "", # Please fill in the correct URL - "filename": "first_round.tar.gz", - "extract_path": "examples/aflow/scripts/optimized" + "initial_rounds": { + "url": "https://drive.google.com/uc?export=download&id=1UBoW4WBWjX2gs4I_jq3ALdXeLdwDJMdP", + "filename": "initial_rounds.tar.gz", + "extract_path": "metagpt/ext/aflow/scripts/optimized" } -] +} def download(datasets): """Main function to process all selected datasets.""" diff --git a/metagpt/ext/aflow/full data(include baselines).zip b/metagpt/ext/aflow/full data(include baselines).zip deleted file mode 100644 index 4ddfadbfd..000000000 Binary files a/metagpt/ext/aflow/full data(include baselines).zip and /dev/null differ diff --git a/metagpt/ext/aflow/scripts/evaluator.py b/metagpt/ext/aflow/scripts/evaluator.py index 26a493402..873f4ad9b 100644 --- a/metagpt/ext/aflow/scripts/evaluator.py +++ b/metagpt/ext/aflow/scripts/evaluator.py @@ -55,5 +55,5 @@ class Evaluator: return graph(name=dataset, llm_config=llm_config, dataset=dataset_config) def _get_data_path(self, dataset: DatasetType, test: bool) -> str: - base_path = f"examples/aflow/data/{dataset.lower()}" + base_path = f"metagpt/ext/aflow/data/{dataset.lower()}" return f"{base_path}_test.jsonl" if test else f"{base_path}_validate.jsonl" diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/__init__.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py new file mode 100644 index 000000000..c02b0b9e2 --- /dev/null +++ b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py @@ -0,0 +1,33 @@ +from typing import Literal +import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.template.operator as operator +import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.round_2.prompt as prompt_custom +from metagpt.provider.llm_provider_registry import create_llm_instance +from metagpt.utils.cost_manager import CostManager + +DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"] + +class Workflow: + def __init__( + self, + name: str, + llm_config, + dataset: DatasetType, + ) -> None: + self.name = name + self.dataset = dataset + self.llm = create_llm_instance(llm_config) + self.llm.cost_manager = CostManager() + self.custom = operator.Custom(self.llm) + self.sc_ensemble = operator.ScEnsemble(self.llm) + + async def __call__(self, problem: str): + """ + Implementation of the workflow + """ + solutions = [] + for _ in range(3): + solution = await self.custom(input=problem, instruction=prompt_custom.SOLVE_PROMPT) + solutions.append(solution['response']) + + final_solution = await self.sc_ensemble(solutions=solutions, problem=problem) + return final_solution['response'], self.llm.cost_manager.total_cost diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py new file mode 100644 index 000000000..3d20965bc --- /dev/null +++ b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py @@ -0,0 +1,12 @@ +SOLVE_PROMPT = """ +You are a mathematical problem solver. Your task is to solve the given problem step by step, showing all your work. After solving the problem, provide the final numerical answer without any units or explanations. Make sure to: + +1. Break down the problem into clear steps. +2. Show all calculations. +3. Use proper mathematical notation. +4. Double-check your work for accuracy. +5. Provide only the final numerical answer at the end, with no additional text. + +Solve the following problem: + +""" \ No newline at end of file diff --git a/metagpt/ext/aflow/scripts/utils.py b/metagpt/ext/aflow/scripts/utils.py index c4e6aca0d..d74bea1b5 100644 --- a/metagpt/ext/aflow/scripts/utils.py +++ b/metagpt/ext/aflow/scripts/utils.py @@ -56,7 +56,7 @@ def extract_test_cases_from_jsonl( entry_point: str, dataset: str = "HumanEval" ): if dataset == "HumanEval": - file_path = "examples/aflow/data/humaneval_public_test.jsonl" + file_path = "metagpt/ext/aflow/data/humaneval_public_test.jsonl" # Retain the original hardcoded test cases hardcoded_cases = { "find_zero": "", @@ -71,7 +71,7 @@ def extract_test_cases_from_jsonl( "starts_one_ends":"" } elif dataset == "MBPP": - file_path = "examples/aflow/data/mbpp_public_test.jsonl" + file_path = "metagpt/ext/aflow/data/mbpp_public_test.jsonl" hardcoded_cases = { "remove_odd": "", "replace_spaces": "", diff --git a/optimize.py b/optimize.py deleted file mode 100644 index a3f64d86a..000000000 --- a/optimize.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -# @Date : 8/23/2024 20:00 PM -# @Author : didi -# @Desc : Entrance of AFlow. - -from metagpt.ext.aflow.scripts.optimizer import Optimizer -from metagpt.ext.aflow.scripts.evaluator import DatasetType, QuestionType, OptimizerType -from metagpt.ext.aflow.data.download_data import download -from metagpt.configs.models_config import ModelsConfig -from typing import Literal - -# DatasetType, QuestionType, and OptimizerType definitions -# DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"] -# QuestionType = Literal["math", "code", "qa"] -# OptimizerType = Literal["Graph", "Test"] - -# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results. -# download(["datasets", "results", "initial_rounds"]) - -# Crucial Parameters -dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType -sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows -question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType -optimized_path: str = "examples/aflow/scripts/optimized" # Optimized Result Save Path -initial_round: int = 1 # Corrected the case from Initial_round to initial_round -max_rounds: int = 20 -check_convergence: bool = True - -# Config llm model, you can modify `config/config2.yaml` to use more llms. -mini_llm_config = ModelsConfig.default().get("gpt-4o-mini") -claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620") - -# Config operators. -operators = [ - "Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes. - # "AnswerGenerate" # It's for qa - # "CustomCodeGenerate", # It's for code - "ScEnsemble", # It's for code, math and qa - # "Test", # It's for code - "Programmer", # It's for math -] - -# Create an optimizer instance -optimizer = Optimizer( - dataset=dataset, # Config dataset - question_type=question_type, # Config Question Type - opt_llm_config=claude_llm_config, # Config Optimizer LLM - exec_llm_config=mini_llm_config, # Config Execution LLM - check_convergence=check_convergence, # Whether Early Stop - operators=operators, # Config Operators you want to use - optimized_path=optimized_path, # Config Optimized workflow's file path - sample=sample, # Only Top(sample) rounds will be selected. - initial_round=initial_round, # Optimize from initial round - max_rounds=max_rounds # The max iteration of AFLOW. -) - -if __name__ == "__main__": - # Optimize workflow via setting the optimizer's mode to 'Graph' - optimizer.optimize("Graph") - # Test workflow via setting the optimizer's mode to 'Test' - # optimizer.optimize("Test") \ No newline at end of file