This commit is contained in:
didi 2024-07-14 09:12:33 +08:00
parent 7fa68d5649
commit 8a241054c7
8 changed files with 301 additions and 120 deletions

View file

@ -3,9 +3,9 @@ from metagpt.llm import LLM
from examples.ags.benchmark.humaneval import sample_generate, samples_generate, extract_failure_tests, automatic_evalplus
from examples.ags.w_action_node.utils import jsonl_ranker
# asyncio.run(sample_generate('HumanEval/101'))
asyncio.run(sample_generate('HumanEval/132',result_path="1.jsonl"))
# asyncio.run(sample_generate('HumanEval/1'))
asyncio.run(samples_generate(mode='ags'))
# asyncio.run(samples_generate(mode='ags',result_path="2.jsonl"))
# jsonl_ranker("samples.jsonl", "samples.jsonl")
@ -13,7 +13,7 @@ asyncio.run(samples_generate(mode='ags'))
# unpassed_exapmle = extract_failure_tests()
# print(unpassed_exapmle)
# unpassed_exapmle = extract_failure_tests()
# unpassed_exapmle = extract_failure_tests(file_path="2_eval_results.json")
# print(unpassed_exapmle)
# failure_list = ['HumanEval/0', 'HumanEval/1', 'HumanEval/7', 'HumanEval/16', 'HumanEval/24', 'HumanEval/31', 'HumanEval/40', 'HumanEval/56', 'HumanEval/67', 'HumanEval/74', 'HumanEval/83', 'HumanEval/86', 'HumanEval/87', 'HumanEval/90', 'HumanEval/95', 'HumanEval/101', 'HumanEval/104', 'HumanEval/113', 'HumanEval/125', 'HumanEval/132', 'HumanEval/135', 'HumanEval/140', 'HumanEval/143', 'HumanEval/145', 'HumanEval/154', 'HumanEval/161']