From b97fa40e0e26b51cbe40d4aeaaf55ca37a8d3b57 Mon Sep 17 00:00:00 2001 From: kit <101046518@qq.com> Date: Mon, 28 Oct 2024 22:02:12 +0800 Subject: [PATCH] 1 Signed-off-by: kit <101046518@qq.com> --- examples/di/InfiAgent-DABench/DABench.py | 7 ++++--- examples/di/InfiAgent-DABench/README.md | 2 +- .../di/InfiAgent-DABench/run_InfiAgent-DABench.py | 13 +++++++------ .../InfiAgent-DABench/run_InfiAgent-DABench_all.py | 14 +++++++------- ...ch_sigle.py => run_InfiAgent-DABench_single.py} | 6 +++--- 5 files changed, 22 insertions(+), 20 deletions(-) rename examples/di/InfiAgent-DABench/{run_InfiAgent-DABench_sigle.py => run_InfiAgent-DABench_single.py} (79%) diff --git a/examples/di/InfiAgent-DABench/DABench.py b/examples/di/InfiAgent-DABench/DABench.py index 4f907adab..50ec04b29 100644 --- a/examples/di/InfiAgent-DABench/DABench.py +++ b/examples/di/InfiAgent-DABench/DABench.py @@ -8,6 +8,7 @@ import nest_asyncio from examples.di.requirements_prompt import DABENCH from metagpt.const import DABENCH_PATH +from metagpt.logs import logger from metagpt.utils.exceptions import handle_exception @@ -473,14 +474,14 @@ class DABench: if __name__ == "__main__": - DA = DABench() + bench = DABench() id = 0 prediction = "@mean_fare[34.65]" - print(DA.eval(id, prediction)) + logger.info(bench.eval(id, prediction)) ids = [0, 5, 6] predictions = [ "@mean_fare[34.89]", "@correlation_coefficient[0.21]", "@mean_fare_child[31.09], @mean_fare_teenager[31.98], @mean_fare_adult[35.17], @mean_fare_elderly[43.47]", ] - print(DA.eval_all(ids, predictions)) + logger.info(bench.eval_all(ids, predictions)) diff --git a/examples/di/InfiAgent-DABench/README.md b/examples/di/InfiAgent-DABench/README.md index db8842782..814a1937b 100644 --- a/examples/di/InfiAgent-DABench/README.md +++ b/examples/di/InfiAgent-DABench/README.md @@ -9,7 +9,7 @@ ## Dataset ``` ## How to run ``` -python run_InfiAgent-DABench_sigle.py --id x # run a task, x represents the id of the question you want to test +python run_InfiAgent-DABench_single.py --id x # run a task, x represents the id of the question you want to test python run_InfiAgent-DABench_all.py # Run all tasks serially python run_InfiAgent-DABench.py --k x # Run all tasks in parallel, x represents the number of parallel tasks at a time ``` \ No newline at end of file diff --git a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench.py b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench.py index dd27ef7cd..7e1fbad8b 100644 --- a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench.py +++ b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench.py @@ -3,6 +3,7 @@ import json from DABench import DABench +from metagpt.logs import logger from metagpt.roles.di.data_interpreter import DataInterpreter @@ -30,7 +31,7 @@ async def get_prediction(agent, requirement): return prediction # Return the extracted prediction except Exception as e: # Log an error message if an exception occurs during processing - print(f"Error processing requirement: {requirement}. Error: {e}") + logger.info(f"Error processing requirement: {requirement}. Error: {e}") return None # Return None in case of an error @@ -43,13 +44,13 @@ async def evaluate_all(agent, k): agent: The baseline agent used for making predictions. k (int): The number of tasks to process in each group concurrently. """ - DA = DABench() # Create an instance of DABench to access its methods and data + bench = DABench() # Create an instance of DABench to access its methods and data id_list, predictions = [], [] # Initialize lists to store IDs and predictions tasks = [] # Initialize a list to hold the tasks # Iterate over the answers in DABench to generate tasks - for key, value in DA.answers.items(): - requirement = DA.generate_formatted_prompt(key) # Generate a formatted prompt for the current key + for key, value in bench.answers.items(): + requirement = bench.generate_formatted_prompt(key) # Generate a formatted prompt for the current key tasks.append(get_prediction(agent, requirement)) # Append the prediction task to the tasks list id_list.append(key) # Append the current key to the ID list @@ -62,8 +63,8 @@ async def evaluate_all(agent, k): # Filter out any None values from the predictions and extend the predictions list predictions.extend(pred for pred in group_predictions if pred is not None) - # Evaluate the results using all valid predictions and print the evaluation - print(DA.eval_all(id_list, predictions)) + # Evaluate the results using all valid predictions and logger.info the evaluation + logger.info(bench.eval_all(id_list, predictions)) def main(k=5): diff --git a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_all.py b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_all.py index bbdbc64e7..5cd1ef4b0 100644 --- a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_all.py +++ b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_all.py @@ -9,26 +9,26 @@ from metagpt.utils.recovery_util import save_history async def main(): """Evaluate all""" - DA = DABench() + bench = DABench() id_list, predictions, labels, is_true = [], [], [], [] - for key, value in DA.answers.items(): + for key, value in bench.answers.items(): id_list.append(key) - labels.append(str(DA.get_answer(key))) + labels.append(str(bench.get_answer(key))) try: - requirement = DA.generate_formatted_prompt(key) + requirement = bench.generate_formatted_prompt(key) di = DataInterpreter() result = await di.run(requirement) logger.info(result) save_history(role=di) - temp_prediction, temp_istrue = DA.eval(key, str(result)) + temp_prediction, temp_istrue = bench.eval(key, str(result)) is_true.append(str(temp_istrue)) predictions.append(str(temp_prediction)) except: - is_true.append(str(DA.eval(key, ""))) + is_true.append(str(bench.eval(key, ""))) predictions.append(str("")) df = pd.DataFrame({"Label": labels, "Prediction": predictions, "T/F": is_true}) df.to_excel("DABench_output.xlsx", index=False) - logger.info(DA.eval_all(id_list, predictions)) + logger.info(bench.eval_all(id_list, predictions)) if __name__ == "__main__": diff --git a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_sigle.py b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_single.py similarity index 79% rename from examples/di/InfiAgent-DABench/run_InfiAgent-DABench_sigle.py rename to examples/di/InfiAgent-DABench/run_InfiAgent-DABench_single.py index 22c0a3f45..470f12fc8 100644 --- a/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_sigle.py +++ b/examples/di/InfiAgent-DABench/run_InfiAgent-DABench_single.py @@ -8,13 +8,13 @@ from metagpt.utils.recovery_util import save_history async def main(id=0): """Evaluate one task""" - DA = DABench() - requirement = DA.generate_formatted_prompt(id) + bench = DABench() + requirement = bench.generate_formatted_prompt(id) di = DataInterpreter() result = await di.run(requirement) logger.info(result) save_history(role=di) - _, is_correct = DA.eval(id, str(result)) + _, is_correct = bench.eval(id, str(result)) logger.info(f"Prediction is {'correct' if is_correct else 'incorrect'}.")