Update

2026-05-05 05:42:37 +02:00 · 2024-07-17 23:08:41 +08:00 · 2024-07-17 23:08:41 +08:00 · 89b0c4ce30
commit 89b0c4ce30
parent e0955c5bf9
7 changed files with 208 additions and 58 deletions
--- a/he_test.py
+++ b/he_test.py
@ -3,20 +3,18 @@ from metagpt.llm import LLM
 from examples.ags.benchmark.humaneval import sample_generate, samples_generate, extract_failure_tests, automatic_evalplus
 from examples.ags.w_action_node.utils import jsonl_ranker

-asyncio.run(sample_generate('HumanEval/132',result_path="1.jsonl"))
-# asyncio.run(sample_generate('HumanEval/1'))
-# asyncio.run(samples_generate(mode='ags',result_path="2.jsonl"))
+# 132 141 136 80 73
+# asyncio.run(sample_generate('HumanEval/118',result_path="llm_based_4.jsonl",mode="llm"))
+# asyncio.run(samples_generate(mode='ags',result_path="ags_based_1.jsonl"))
 # jsonl_ranker("samples.jsonl", "samples.jsonl")

-
-# if automatic_evalplus():
-#     unpassed_exapmle = extract_failure_tests()
-#     print(unpassed_exapmle)
+result_path = "ags_based_2.jsonl"
+if automatic_evalplus(result_path):
+    unpassed_exapmle = extract_failure_tests(result_path[:-6]+"_eval_results.json")
+    print(unpassed_exapmle)

 # unpassed_exapmle = extract_failure_tests(file_path="2_eval_results.json")
 # print(unpassed_exapmle)

-# failure_list = ['HumanEval/0', 'HumanEval/1', 'HumanEval/7', 'HumanEval/16', 'HumanEval/24', 'HumanEval/31', 'HumanEval/40', 'HumanEval/56', 'HumanEval/67', 'HumanEval/74', 'HumanEval/83', 'HumanEval/86', 'HumanEval/87', 'HumanEval/90', 'HumanEval/95', 'HumanEval/101', 'HumanEval/104', 'HumanEval/113', 'HumanEval/125', 'HumanEval/132', 'HumanEval/135', 'HumanEval/140', 'HumanEval/143', 'HumanEval/145', 'HumanEval/154', 'HumanEval/161']
-
 # for example in failure_list:
 #     asyncio.run(sample_generate(example))