From 96bd3101ecec72d47c429b10cd0f91ab65dbe752 Mon Sep 17 00:00:00 2001 From: didi <84363704+didiforgithub@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:33:55 +0800 Subject: [PATCH] Update humaneval benchmark & utils; fixs some bug in action node. --- examples/ags/benchmark/humaneval.py | 16 +++++++--------- examples/ags/w_action_node/utils.py | 4 ++-- metagpt/actions/action_node.py | 2 +- metagpt/utils/common.py | 24 ++++++++++++++++++++++++ test.py | 2 +- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/examples/ags/benchmark/humaneval.py b/examples/ags/benchmark/humaneval.py index 02f221753..62b062ae0 100644 --- a/examples/ags/benchmark/humaneval.py +++ b/examples/ags/benchmark/humaneval.py @@ -15,7 +15,7 @@ from evalplus.data import get_human_eval_plus from examples.ags.w_action_node.graph import HumanEvalGraph from examples.ags.w_action_node.operator import GenerateCode, GenerateCodeBlock -from examples.ags.w_action_node.utils import sort_json_by_task_id +from examples.ags.w_action_node.utils import sort_json_by_key from metagpt.llm import LLM from metagpt.logs import logger from metagpt.utils.common import add_jsonl_file, read_json_file @@ -64,20 +64,18 @@ async def route_generate(mode: ModeType, id: str): async def sample_generate(id, result_path: str = "samples.jsonl", mode: ModeType = "ags"): sample_dict = await route_generate(mode, id) add_jsonl_file(result_path, [sample_dict]) - sort_json_by_task_id(result_path, result_path) + sort_json_by_key(result_path, result_path) async def samples_generate(mode: ModeType, result_path: str = "samples.jsonl"): ids = list(get_human_eval_plus().keys()) file_lock = asyncio.Lock() - @handle_exception( - exception_type=Exception, - exception_msg="Error in solve_and_write function", - default_return=lambda id, *args, **kwargs: id, - ) async def solve_and_write(id: str, mode: ModeType) -> Optional[str]: - sample_dict = await route_generate(mode, id) + try: + sample_dict = await route_generate(mode, id) + except Exception as e: + return id async with file_lock: async with aiofiles.open(result_path, mode="a") as f: await f.write(json.dumps(sample_dict) + "\n") @@ -96,7 +94,7 @@ async def samples_generate(mode: ModeType, result_path: str = "samples.jsonl"): except Exception: logger.error(f"{task_id} fail") - sort_json_by_task_id(result_path, result_path) + sort_json_by_key(result_path, result_path) if not failed_tasks: if automatic_evalplus(result_path): diff --git a/examples/ags/w_action_node/utils.py b/examples/ags/w_action_node/utils.py index 9cb811976..fd3341cca 100644 --- a/examples/ags/w_action_node/utils.py +++ b/examples/ags/w_action_node/utils.py @@ -20,7 +20,7 @@ def extract_task_id(task_id: str) -> int: return int(match.group(1)) if match else 0 -def sort_json_by_task_id(input_file: str, output_file: str): +def sort_json_by_key(input_file: str, output_file: str, key: str = "task_id"): """ Read a JSONL file, sort the entries based on task_id, and write to a new JSONL file. @@ -32,7 +32,7 @@ def sort_json_by_task_id(input_file: str, output_file: str): data = [json.loads(line) for line in f] # Sort the data based on the numeric part of task_id - sorted_data = sorted(data, key=lambda x: extract_task_id(x["task_id"])) + sorted_data = sorted(data, key=lambda x: extract_task_id(x[key])) # Write the sorted data to a new JSONL file with open(output_file, "w") as f: diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index ef0414ff7..a09d4a1da 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -539,7 +539,7 @@ class ActionNode: if self.schema: schema = self.schema - if mode == self.MODE_CODE_FILL: + if mode == MODE_CODE_FILL: result = await self.code_fill(context, function_name, timeout) self.instruct_content = self.create_class()(**result) return self diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index cf490084d..dfb616da4 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -581,6 +581,30 @@ def write_json_file(json_file: str, data: list, encoding: str = None, indent: in json.dump(data, fout, ensure_ascii=False, indent=indent, default=to_jsonable_python) +def read_jsonl_file(jsonl_file: str, encoding="utf-8") -> list[dict]: + if not Path(jsonl_file).exists(): + raise FileNotFoundError(f"json_file: {jsonl_file} not exist, return []") + datas = [] + with open(jsonl_file, "r", encoding=encoding) as fin: + try: + for line in fin: + data = json.loads(line) + datas.append(data) + except Exception: + raise ValueError(f"read jsonl file: {jsonl_file} failed") + return datas + + +def add_jsonl_file(jsonl_file: str, data: list[dict], encoding: str = None): + folder_path = Path(jsonl_file).parent + if not folder_path.exists(): + folder_path.mkdir(parents=True, exist_ok=True) + + with open(jsonl_file, "a", encoding=encoding) as fout: + for json_item in data: + fout.write(json.dumps(json_item) + '\n') + + def read_csv_to_list(curr_file: str, header=False, strip_trail=True): """ Reads in a csv file to a list of list. If header is True, it returns a diff --git a/test.py b/test.py index 3161a7368..e3395e0de 100644 --- a/test.py +++ b/test.py @@ -7,5 +7,5 @@ import asyncio from examples.ags.benchmark.humaneval import sample_generate, samples_generate -asyncio.run(sample_generate("HumanEval/id", result_path="result_path", mode="alpha")) +asyncio.run(sample_generate("HumanEval/id", result_path="result_path", mode="alpha_codium")) asyncio.run(samples_generate(mode="alpha_codium", result_path="result_path"))