Update humaneval benchmark & utils; fixs some bug in action node.

This commit is contained in:
didi 2024-08-01 15:33:55 +08:00
parent d671e6ca6d
commit 96bd3101ec
5 changed files with 35 additions and 13 deletions

View file

@ -15,7 +15,7 @@ from evalplus.data import get_human_eval_plus
from examples.ags.w_action_node.graph import HumanEvalGraph
from examples.ags.w_action_node.operator import GenerateCode, GenerateCodeBlock
from examples.ags.w_action_node.utils import sort_json_by_task_id
from examples.ags.w_action_node.utils import sort_json_by_key
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.utils.common import add_jsonl_file, read_json_file
@ -64,20 +64,18 @@ async def route_generate(mode: ModeType, id: str):
async def sample_generate(id, result_path: str = "samples.jsonl", mode: ModeType = "ags"):
sample_dict = await route_generate(mode, id)
add_jsonl_file(result_path, [sample_dict])
sort_json_by_task_id(result_path, result_path)
sort_json_by_key(result_path, result_path)
async def samples_generate(mode: ModeType, result_path: str = "samples.jsonl"):
ids = list(get_human_eval_plus().keys())
file_lock = asyncio.Lock()
@handle_exception(
exception_type=Exception,
exception_msg="Error in solve_and_write function",
default_return=lambda id, *args, **kwargs: id,
)
async def solve_and_write(id: str, mode: ModeType) -> Optional[str]:
sample_dict = await route_generate(mode, id)
try:
sample_dict = await route_generate(mode, id)
except Exception as e:
return id
async with file_lock:
async with aiofiles.open(result_path, mode="a") as f:
await f.write(json.dumps(sample_dict) + "\n")
@ -96,7 +94,7 @@ async def samples_generate(mode: ModeType, result_path: str = "samples.jsonl"):
except Exception:
logger.error(f"{task_id} fail")
sort_json_by_task_id(result_path, result_path)
sort_json_by_key(result_path, result_path)
if not failed_tasks:
if automatic_evalplus(result_path):

View file

@ -20,7 +20,7 @@ def extract_task_id(task_id: str) -> int:
return int(match.group(1)) if match else 0
def sort_json_by_task_id(input_file: str, output_file: str):
def sort_json_by_key(input_file: str, output_file: str, key: str = "task_id"):
"""
Read a JSONL file, sort the entries based on task_id, and write to a new JSONL file.
@ -32,7 +32,7 @@ def sort_json_by_task_id(input_file: str, output_file: str):
data = [json.loads(line) for line in f]
# Sort the data based on the numeric part of task_id
sorted_data = sorted(data, key=lambda x: extract_task_id(x["task_id"]))
sorted_data = sorted(data, key=lambda x: extract_task_id(x[key]))
# Write the sorted data to a new JSONL file
with open(output_file, "w") as f: