This commit is contained in:
didi 2024-10-22 12:51:30 +08:00
parent 56d0af1e9e
commit 27e942cc5e
11 changed files with 19 additions and 58 deletions

View file

@ -9,6 +9,8 @@ import aiofiles
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from metagpt.logs import logger
class BaseBenchmark(ABC):
def __init__(self, name: str, file_path: str, log_path: str):
@ -39,7 +41,7 @@ class BaseBenchmark(ABC):
output_file = os.path.join(self.log_path, filename)
df.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")
logger.info(f"Results saved to {output_file}")
return avg_score, a_cost, t_cost
@ -95,6 +97,6 @@ class BaseBenchmark(ABC):
results = await self.evaluate_all_problems(data, graph, max_concurrent_tasks)
columns = self.get_result_columns()
average_score, average_cost, total_cost = self.save_results_to_csv(results, columns)
print(f"Average score on {self.name} dataset: {average_score:.5f}")
print(f"Total Cost: {total_cost:.5f}")
logger.info(f"Average score on {self.name} dataset: {average_score:.5f}")
logger.info(f"Total Cost: {total_cost:.5f}")
return average_score, average_cost, total_cost

View file

@ -8,7 +8,7 @@ from typing import Callable, List, Optional, Tuple
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.logs import logger
class GSM8KBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -6,7 +6,7 @@ from typing import Callable, List, Tuple
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.logs import logger
class HotpotQABenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from metagpt.actions.code_sanitize import sanitize
from metagpt.logs import logger
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
@ -138,11 +139,11 @@ class HumanEvalBenchmark(BaseBenchmark):
return input_text, prediction, expected_output, score, cost
except asyncio.TimeoutError:
print("Timeout error. Skipping this sample.")
logger.info("Timeout error. Skipping this sample.")
return input_text, "Timeout", expected_output, 0.0, 0.0
except Exception as e:
print(f"Maximum retries reached. Skipping this sample. Error: {e}")
logger.info(f"Maximum retries reached. Skipping this sample. Error: {e}")
return input_text, str(e), expected_output, 0.0, 0.0
def calculate_score(self, expected_output: str, prediction: str) -> Tuple[float, str]:

View file

@ -9,7 +9,7 @@ from sympy.parsing.sympy_parser import parse_expr
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
from metagpt.logs import logger
class MATHBenchmark(BaseBenchmark):
def __init__(self, name: str, file_path: str, log_path: str):

View file

@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from metagpt.actions.code_sanitize import sanitize
from metagpt.logs import logger
from metagpt.ext.aflow.benchmark.benchmark import BaseBenchmark
@ -112,7 +113,7 @@ class MBPPBenchmark(BaseBenchmark):
return input_text, prediction, expected_output, score, cost
except Exception as e:
print(f"Maximum retries reached. Skipping this sample. Error: {e}")
logger.info(f"Maximum retries reached. Skipping this sample. Error: {e}")
return input_text, str(e), expected_output, 0.0, 0.0
def calculate_score(self, expected_output: str, prediction: str) -> Tuple[float, str]:

View file

@ -10,6 +10,7 @@ from typing import Dict
import requests
from tqdm import tqdm
from metagpt.logs import logger
def download_file(url: str, filename: str) -> None:
"""Download a file from the given URL and show progress."""
@ -33,16 +34,16 @@ def extract_tar_gz(filename: str, extract_path: str) -> None:
def process_dataset(url: str, filename: str, extract_path: str) -> None:
"""Download, extract, and clean up a dataset."""
print(f"Downloading {filename}...")
logger.info(f"Downloading {filename}...")
download_file(url, filename)
print(f"Extracting {filename}...")
logger.info(f"Extracting {filename}...")
extract_tar_gz(filename, extract_path)
print(f"{filename} download and extraction completed.")
logger.info(f"{filename} download and extraction completed.")
os.remove(filename)
print(f"Removed {filename}")
logger.info(f"Removed {filename}")
# Define the datasets to be downloaded

View file

@ -13,6 +13,7 @@ from typing import Dict, List, Tuple
from tenacity import retry, stop_after_attempt, wait_fixed
from metagpt.actions.action_node import ActionNode
from metagpt.logs import logger
from metagpt.ext.aflow.scripts.operator_an import (
AnswerGenerateOp,
CodeGenerateOp,

View file

@ -1,33 +0,0 @@
from typing import Literal
import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.template.operator as operator
import metagpt.ext.aflow.scripts.optimized.GSM8K.workflows.round_2.prompt as prompt_custom
from metagpt.provider.llm_provider_registry import create_llm_instance
from metagpt.utils.cost_manager import CostManager
DatasetType = Literal["HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP"]
class Workflow:
def __init__(
self,
name: str,
llm_config,
dataset: DatasetType,
) -> None:
self.name = name
self.dataset = dataset
self.llm = create_llm_instance(llm_config)
self.llm.cost_manager = CostManager()
self.custom = operator.Custom(self.llm)
self.sc_ensemble = operator.ScEnsemble(self.llm)
async def __call__(self, problem: str):
"""
Implementation of the workflow
"""
solutions = []
for _ in range(3):
solution = await self.custom(input=problem, instruction=prompt_custom.SOLVE_PROMPT)
solutions.append(solution['response'])
final_solution = await self.sc_ensemble(solutions=solutions, problem=problem)
return final_solution['response'], self.llm.cost_manager.total_cost

View file

@ -1,12 +0,0 @@
SOLVE_PROMPT = """
You are a mathematical problem solver. Your task is to solve the given problem step by step, showing all your work. After solving the problem, provide the final numerical answer without any units or explanations. Make sure to:
1. Break down the problem into clear steps.
2. Show all calculations.
3. Use proper mathematical notation.
4. Double-check your work for accuracy.
5. Provide only the final numerical answer at the end, with no additional text.
Solve the following problem:
"""