diff --git a/examples/ags/benchmark/humaneval.py b/examples/ags/benchmark/humaneval.py index b63b8889e..ce0c02bbc 100644 --- a/examples/ags/benchmark/humaneval.py +++ b/examples/ags/benchmark/humaneval.py @@ -3,6 +3,7 @@ # @Author : didi # @Desc : test on human eval graph +import os import json import subprocess import sys @@ -92,12 +93,12 @@ async def samples_generate(mode:str, result_path:str="samples.jsonl"): jsonl_ranker(result_path, result_path) if not failed_tasks: - + # 自动 sanitize + result_path = automatic_sanitize(result_path) if automatic_evalplus(result_path): eval_path = result_path[:-6]+"_eval_results.json" unpassed_exapmle = extract_failure_tests(eval_path) print(unpassed_exapmle) - else: print(failed_tasks) @@ -136,9 +137,24 @@ async def samples_generate_llm(): write_jsonl("samples.jsonl", sample_list) -def hello(): - pass - +def automatic_sanitize(result_path: str = "samples.jsonl"): + """ + 在命令行中自动执行 evalplus.sanitize --samples result_path + 返回result_path前缀加上"-sanitized.jsonl" + """ + command = ["evalplus.sanitize", "--samples", result_path] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + print(f"执行命令时出错: {e}") + return None + + # 构建sanitized文件路径 + base_name = os.path.splitext(result_path)[0] + sanitized_path = f"{base_name}-sanitized.jsonl" + + return sanitized_path def automatic_evalplus(result_path:str ="samples.jsonl"): """ 在命令行中自动执行 evalplus.evaluate --dataset humaneval --samples samples.jsonl --parallel 2 --base-only diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 7e7f27270..738073277 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -510,9 +510,9 @@ class ActionNode: import re field_name = self.get_field_name() prompt = context - prompt += "\nPlease wrap the generated code within triple backticks, like this: ``````" + # prompt += "\nPlease wrap the generated code within triple backticks, like this: ``````" content = await self.llm.aask(prompt, timeout=timeout) - + extracted_code = extract_code_from_response(content) result = {field_name: extracted_code} return result @@ -522,7 +522,7 @@ class ActionNode: ): """ 参考这个代码,只不过LLM调用方式改成使用; - + 参考 """ pass