From 27e942cc5ef94359952df969ad0ff97a2f737946 Mon Sep 17 00:00:00 2001 From: didi <84363704+didiforgithub@users.noreply.github.com> Date: Tue, 22 Oct 2024 12:51:30 +0800 Subject: [PATCH] update --- metagpt/ext/aflow/benchmark/benchmark.py | 8 +++-- metagpt/ext/aflow/benchmark/gsm8k.py | 2 +- metagpt/ext/aflow/benchmark/hotpotqa.py | 2 +- metagpt/ext/aflow/benchmark/humaneval.py | 5 +-- metagpt/ext/aflow/benchmark/math.py | 2 +- metagpt/ext/aflow/benchmark/mbpp.py | 3 +- metagpt/ext/aflow/data/download_data.py | 9 ++--- metagpt/ext/aflow/scripts/operator.py | 1 + .../GSM8K/workflows/round_2/__init__.py | 0 .../GSM8K/workflows/round_2/graph.py | 33 ------------------- .../GSM8K/workflows/round_2/prompt.py | 12 ------- 11 files changed, 19 insertions(+), 58 deletions(-) delete mode 100644 metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/__init__.py delete mode 100644 metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py delete mode 100644 metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py diff --git a/metagpt/ext/aflow/benchmark/benchmark.py b/metagpt/ext/aflow/benchmark/benchmark.py index 1712e1eac..7412334aa 100644 --- a/metagpt/ext/aflow/benchmark/benchmark.py +++ b/metagpt/ext/aflow/benchmark/benchmark.py @@ -9,6 +9,8 @@ import aiofiles import pandas as pd from tqdm.asyncio import tqdm_asyncio +from metagpt.logs import logger + class BaseBenchmark(ABC): def __init__(self, name: str, file_path: str, log_path: str): @@ -39,7 +41,7 @@ class BaseBenchmark(ABC): output_file = os.path.join(self.log_path, filename) df.to_csv(output_file, index=False) - print(f"Results saved to {output_file}") + logger.info(f"Results saved to {output_file}") return avg_score, a_cost, t_cost @@ -95,6 +97,6 @@ class BaseBenchmark(ABC): results = await self.evaluate_all_problems(data, graph, max_concurrent_tasks) columns = self.get_result_columns() average_score, average_cost, total_cost = self.save_results_to_csv(results, columns) - print(f"Average score on {self.name} dataset: {average_score:.5f}") - print(f"Total Cost: {total_cost:.5f}") + logger.info(f"Average score on {self.name} dataset: {average_score:.5f}") + logger.info(f"Total Cost: {total_cost:.5f}") return average_score, average_cost, total_cost diff --git a/metagpt/ext/aflow/benchmark/gsm8k.py b/metagpt/ext/aflow/benchmark/gsm8k.py index 86887db7f..5ecff8c7f 100644 --- a/metagpt/ext/aflow/benchmark/gsm8k.py +++ b/metagpt/ext/aflow/benchmark/gsm8k.py @@ -8,7 +8,7 @@ from typing import Callable, List, Optional, Tuple from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark - +from metagpt.logs import logger class GSM8KBenchmark(BaseBenchmark): def __init__(self, name: str, file_path: str, log_path: str): diff --git a/metagpt/ext/aflow/benchmark/hotpotqa.py b/metagpt/ext/aflow/benchmark/hotpotqa.py index 3b485c022..85c15440e 100644 --- a/metagpt/ext/aflow/benchmark/hotpotqa.py +++ b/metagpt/ext/aflow/benchmark/hotpotqa.py @@ -6,7 +6,7 @@ from typing import Callable, List, Tuple from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark - +from metagpt.logs import logger class HotpotQABenchmark(BaseBenchmark): def __init__(self, name: str, file_path: str, log_path: str): diff --git a/metagpt/ext/aflow/benchmark/humaneval.py b/metagpt/ext/aflow/benchmark/humaneval.py index 53bc7cfde..abe44e6e4 100644 --- a/metagpt/ext/aflow/benchmark/humaneval.py +++ b/metagpt/ext/aflow/benchmark/humaneval.py @@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from metagpt.actions.code_sanitize import sanitize +from metagpt.logs import logger from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark @@ -138,11 +139,11 @@ class HumanEvalBenchmark(BaseBenchmark): return input_text, prediction, expected_output, score, cost except asyncio.TimeoutError: - print("Timeout error. Skipping this sample.") + logger.info("Timeout error. Skipping this sample.") return input_text, "Timeout", expected_output, 0.0, 0.0 except Exception as e: - print(f"Maximum retries reached. Skipping this sample. Error: {e}") + logger.info(f"Maximum retries reached. Skipping this sample. Error: {e}") return input_text, str(e), expected_output, 0.0, 0.0 def calculate_score(self, expected_output: str, prediction: str) -> Tuple[float, str]: diff --git a/metagpt/ext/aflow/benchmark/math.py b/metagpt/ext/aflow/benchmark/math.py index 475f9e7bd..edc23c347 100644 --- a/metagpt/ext/aflow/benchmark/math.py +++ b/metagpt/ext/aflow/benchmark/math.py @@ -9,7 +9,7 @@ from sympy.parsing.sympy_parser import parse_expr from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark - +from metagpt.logs import logger class MATHBenchmark(BaseBenchmark): def __init__(self, name: str, file_path: str, log_path: str): diff --git a/metagpt/ext/aflow/benchmark/mbpp.py b/metagpt/ext/aflow/benchmark/mbpp.py index 67bd7f255..2d9df3745 100644 --- a/metagpt/ext/aflow/benchmark/mbpp.py +++ b/metagpt/ext/aflow/benchmark/mbpp.py @@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed from metagpt.actions.code_sanitize import sanitize +from metagpt.logs import logger from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark @@ -112,7 +113,7 @@ class MBPPBenchmark(BaseBenchmark): return input_text, prediction, expected_output, score, cost except Exception as e: - print(f"Maximum retries reached. Skipping this sample. Error: {e}") + logger.info(f"Maximum retries reached. Skipping this sample. Error: {e}") return input_text, str(e), expected_output, 0.0, 0.0 def calculate_score(self, expected_output: str, prediction: str) -> Tuple[float, str]: diff --git a/metagpt/ext/aflow/data/download_data.py b/metagpt/ext/aflow/data/download_data.py index 4e176cd56..219e2fec7 100644 --- a/metagpt/ext/aflow/data/download_data.py +++ b/metagpt/ext/aflow/data/download_data.py @@ -10,6 +10,7 @@ from typing import Dict import requests from tqdm import tqdm +from metagpt.logs import logger def download_file(url: str, filename: str) -> None: """Download a file from the given URL and show progress.""" @@ -33,16 +34,16 @@ def extract_tar_gz(filename: str, extract_path: str) -> None: def process_dataset(url: str, filename: str, extract_path: str) -> None: """Download, extract, and clean up a dataset.""" - print(f"Downloading {filename}...") + logger.info(f"Downloading {filename}...") download_file(url, filename) - print(f"Extracting {filename}...") + logger.info(f"Extracting {filename}...") extract_tar_gz(filename, extract_path) - print(f"{filename} download and extraction completed.") + logger.info(f"{filename} download and extraction completed.") os.remove(filename) - print(f"Removed {filename}") + logger.info(f"Removed {filename}") # Define the datasets to be downloaded diff --git a/metagpt/ext/aflow/scripts/operator.py b/metagpt/ext/aflow/scripts/operator.py index 0d1354210..bfd875b26 100644 --- a/metagpt/ext/aflow/scripts/operator.py +++ b/metagpt/ext/aflow/scripts/operator.py @@ -13,6 +13,7 @@ from typing import Dict, List, Tuple from tenacity import retry, stop_after_attempt, wait_fixed from metagpt.actions.action_node import ActionNode +from metagpt.logs import logger from metagpt.ext.aflow.scripts.operator_an import ( AnswerGenerateOp, CodeGenerateOp, diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/__init__.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py deleted file mode 100644 index c02b0b9e2..000000000 --- a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/graph.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Literal -import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.template.operator as operator -import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.round_2.prompt as prompt_custom -from metagpt.provider.llm_provider_registry import create_llm_instance -from metagpt.utils.cost_manager import CostManager - -DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"] - -class Workflow: - def __init__( - self, - name: str, - llm_config, - dataset: DatasetType, - ) -> None: - self.name = name - self.dataset = dataset - self.llm = create_llm_instance(llm_config) - self.llm.cost_manager = CostManager() - self.custom = operator.Custom(self.llm) - self.sc_ensemble = operator.ScEnsemble(self.llm) - - async def __call__(self, problem: str): - """ - Implementation of the workflow - """ - solutions = [] - for _ in range(3): - solution = await self.custom(input=problem, instruction=prompt_custom.SOLVE_PROMPT) - solutions.append(solution['response']) - - final_solution = await self.sc_ensemble(solutions=solutions, problem=problem) - return final_solution['response'], self.llm.cost_manager.total_cost diff --git a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py b/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py deleted file mode 100644 index 3d20965bc..000000000 --- a/metagpt/ext/aflow/scripts/optimized/GSM8K/workflows/round_2/prompt.py +++ /dev/null @@ -1,12 +0,0 @@ -SOLVE_PROMPT = """ -You are a mathematical problem solver. Your task is to solve the given problem step by step, showing all your work. After solving the problem, provide the final numerical answer without any units or explanations. Make sure to: - -1. Break down the problem into clear steps. -2. Show all calculations. -3. Use proper mathematical notation. -4. Double-check your work for accuracy. -5. Provide only the final numerical answer at the end, with no additional text. - -Solve the following problem: - -""" \ No newline at end of file