diff --git a/10.txt b/10.txt new file mode 100644 index 000000000..809a0da29 --- /dev/null +++ b/10.txt @@ -0,0 +1,52 @@ +2024-07-01 15:30:33.806 | DEBUG | metagpt.provider.base_llm:aask:151 - [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\n## context\n\nGenerate Code Solution for the following problem: \n\ndef is_palindrome(string: str) -> bool:\n """ Test if given string is a palindrome """\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome(\'\')\n \'\'\n >>> make_palindrome(\'cat\')\n \'catac\'\n >>> make_palindrome(\'cata\')\n \'catac\'\n """\n\n\n\n-----\n\n## format example\n[CONTENT]\n{\n "solution": ""\n}\n[/CONTENT]\n\n## nodes: ": # "\n- solution: # Your Code Solution for this problem\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n'}] +2024-07-01 15:30:42.412 | INFO | metagpt.utils.cost_manager:update_cost:57 - Total running cost: $0.000 | Max budget: $10.000 | Current cost: $0.000, prompt_tokens: 318, completion_tokens: 175 +2024-07-01 15:30:42.413 | DEBUG | metagpt.actions.action_node:_aask_v1:421 - llm raw output: +[CONTENT] +{ + "solution": "def make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n \"\"\"\n if not string:\n return ''\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n" +} +[/CONTENT] +2024-07-01 15:30:42.418 | DEBUG | metagpt.actions.action_node:_aask_v1:431 - parsed_data: +{'solution': 'def make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n """\n if not string:\n return \'\'\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n'} +2024-07-01 15:30:42.419 | DEBUG | metagpt.provider.base_llm:aask:151 - [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\n## context\n\nFor the question described as \n\ndef is_palindrome(string: str) -> bool:\n """ Test if given string is a palindrome """\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome(\'\')\n \'\'\n >>> make_palindrome(\'cat\')\n \'catac\'\n >>> make_palindrome(\'cata\')\n \'catac\'\n """\n,\nplease review the following solution: {\'solution\': \'def make_palindrome(string: str) -> str:\\n """ Find the shortest palindrome that begins with a supplied string.\\n Algorithm idea is simple:\\n - Find the longest postfix of supplied string that is a palindrome.\\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\\n """\\n if not string:\\n return \\\'\\\'\\n\\n for i in range(len(string)):\\n if string[i:] == string[i:][::-1]:\\n return string + string[:i][::-1]\\n\\n return string + string[:-1][::-1]\\n\'}, and provide a review result in boolean format.\nIf you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments\n\n\n-----\n\n## format example\n[CONTENT]\n{\n "review_result": false,\n "feedback": ""\n}\n[/CONTENT]\n\n## nodes: ": # "\n- review_result: # The Review Result (Bool). If you think this solution looks good for you, return \'true\'; If not, return \'false\'\n- feedback: # Your FeedBack for this problem based on the criteria. If the review result is true, you can put it \'nothing here\'.\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n'}] +2024-07-01 15:30:44.222 | INFO | metagpt.utils.cost_manager:update_cost:57 - Total running cost: $0.000 | Max budget: $10.000 | Current cost: $0.000, prompt_tokens: 585, completion_tokens: 29 +2024-07-01 15:30:44.222 | DEBUG | metagpt.actions.action_node:_aask_v1:421 - llm raw output: +[CONTENT] +{ + "review_result": true, + "feedback": "nothing here" +} +[/CONTENT] +2024-07-01 15:30:44.224 | DEBUG | metagpt.actions.action_node:_aask_v1:431 - parsed_data: +{'review_result': True, 'feedback': 'nothing here'} +2024-07-01 15:30:44.224 | DEBUG | metagpt.provider.base_llm:aask:151 - [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\n## context\n\nGenerate Code Solution for the following problem: \n\ndef is_palindrome(string: str) -> bool:\n """ Test if given string is a palindrome """\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome(\'\')\n \'\'\n >>> make_palindrome(\'cat\')\n \'catac\'\n >>> make_palindrome(\'cata\')\n \'catac\'\n """\n\n\n\n-----\n\n## format example\n[CONTENT]\n{\n "solution": ""\n}\n[/CONTENT]\n\n## nodes: ": # "\n- solution: # Your Code Solution for this problem\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n'}] +2024-07-01 15:30:53.135 | INFO | metagpt.utils.cost_manager:update_cost:57 - Total running cost: $0.000 | Max budget: $10.000 | Current cost: $0.000, prompt_tokens: 318, completion_tokens: 175 +2024-07-01 15:30:53.136 | DEBUG | metagpt.actions.action_node:_aask_v1:421 - llm raw output: +[CONTENT] +{ + "solution": "def make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n \"\"\"\n if not string:\n return ''\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n" +} +[/CONTENT] +2024-07-01 15:30:53.137 | DEBUG | metagpt.actions.action_node:_aask_v1:431 - parsed_data: +{'solution': 'def make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n """\n if not string:\n return \'\'\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n'} +2024-07-01 15:30:53.138 | DEBUG | metagpt.provider.base_llm:aask:151 - [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\n## context\n\nFor the question described as \n\ndef is_palindrome(string: str) -> bool:\n """ Test if given string is a palindrome """\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome(\'\')\n \'\'\n >>> make_palindrome(\'cat\')\n \'catac\'\n >>> make_palindrome(\'cata\')\n \'catac\'\n """\n,\nplease review the following solution: {\'solution\': \'def make_palindrome(string: str) -> str:\\n """ Find the shortest palindrome that begins with a supplied string.\\n Algorithm idea is simple:\\n - Find the longest postfix of supplied string that is a palindrome.\\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\\n """\\n if not string:\\n return \\\'\\\'\\n\\n for i in range(len(string)):\\n if string[i:] == string[i:][::-1]:\\n return string + string[:i][::-1]\\n\\n return string + string[:-1][::-1]\\n\'}, and provide a review result in boolean format.\nIf you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments\n\n\n-----\n\n## format example\n[CONTENT]\n{\n "review_result": false,\n "feedback": ""\n}\n[/CONTENT]\n\n## nodes: ": # "\n- review_result: # The Review Result (Bool). If you think this solution looks good for you, return \'true\'; If not, return \'false\'\n- feedback: # Your FeedBack for this problem based on the criteria. If the review result is true, you can put it \'nothing here\'.\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n'}] +2024-07-01 15:30:55.232 | INFO | metagpt.utils.cost_manager:update_cost:57 - Total running cost: $0.000 | Max budget: $10.000 | Current cost: $0.000, prompt_tokens: 585, completion_tokens: 29 +2024-07-01 15:30:55.233 | DEBUG | metagpt.actions.action_node:_aask_v1:421 - llm raw output: +[CONTENT] +{ + "review_result": true, + "feedback": "nothing here" +} +[/CONTENT] +2024-07-01 15:30:55.234 | DEBUG | metagpt.actions.action_node:_aask_v1:431 - parsed_data: +{'review_result': True, 'feedback': 'nothing here'} +2024-07-01 15:30:55.234 | DEBUG | metagpt.provider.base_llm:aask:151 - [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': '\n## context\n\nFor the question described as \n\ndef is_palindrome(string: str) -> bool:\n """ Test if given string is a palindrome """\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome(\'\')\n \'\'\n >>> make_palindrome(\'cat\')\n \'catac\'\n >>> make_palindrome(\'cata\')\n \'catac\'\n """\n, Solutions: def make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n """\n if not string:\n return \'\'\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n\ndef make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n """\n if not string:\n return \'\'\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]\n\n\nPlease select the solution that appears most frequently from these options and provide the best solution based on that.\n\n\n-----\n\n## format example\n[CONTENT]\n{\n "solution": ""\n}\n[/CONTENT]\n\n## nodes: ": # "\n- solution: # Final ensemble solution for this problem\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n'}] +2024-07-01 15:31:03.826 | INFO | metagpt.utils.cost_manager:update_cost:57 - Total running cost: $0.001 | Max budget: $10.000 | Current cost: $0.000, prompt_tokens: 635, completion_tokens: 173 +2024-07-01 15:31:03.827 | DEBUG | metagpt.actions.action_node:_aask_v1:421 - llm raw output: +[CONTENT] +{ + "solution": "def make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n \"\"\"\n if not string:\n return ''\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]" +} +[/CONTENT] +2024-07-01 15:31:03.830 | DEBUG | metagpt.actions.action_node:_aask_v1:431 - parsed_data: +{'solution': 'def make_palindrome(string: str) -> str:\n """ Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n """\n if not string:\n return \'\'\n\n for i in range(len(string)):\n if string[i:] == string[i:][::-1]:\n return string + string[:i][::-1]\n\n return string + string[:-1][::-1]'} diff --git a/examples/ags/benchmark/humaneval.py b/examples/ags/benchmark/humaneval.py index 7e639b1ab..c029ad23b 100644 --- a/examples/ags/benchmark/humaneval.py +++ b/examples/ags/benchmark/humaneval.py @@ -3,174 +3,132 @@ # @Author : didi # @Desc : test on human eval graph -# 1. 出效果 -# 2. 代码方面,格式问题,很多格式处理 ->增加效果 -# 3. GSM8k -> -# 4. 我来写一个GSM8k最基础代码,GSM8k实验代码需要你来改写 - -import os +import asyncio import json +import os import subprocess import sys -import asyncio +from typing import Literal, Optional + import aiofiles -from metagpt.llm import LLM -from evalplus.data import get_human_eval_plus, write_jsonl -from examples.ags.w_action_node.utils import jsonl_ranker +from evalplus.data import get_human_eval_plus + from examples.ags.w_action_node.graph import HumanEvalGraph from examples.ags.w_action_node.operator import GenerateCode, GenerateCodeBlock +from examples.ags.w_action_node.utils import sort_json_by_key +from metagpt.llm import LLM +from metagpt.logs import logger +from metagpt.utils.common import add_jsonl_file, read_json_file +from metagpt.utils.exceptions import handle_exception generate_code = GenerateCode(llm=LLM()) generate_code_block = GenerateCodeBlock(llm=LLM()) -solver = HumanEvalGraph(name="solver", llm=LLM(), criteria='correctness, efficiency, readability', vote_count=1) +solver = HumanEvalGraph(name="solver", llm=LLM(), criteria="correctness, efficiency, readability", vote_count=5) -async def sample_generate(id, result_path:str="samples.jsonl",mode:str="ags"): +ModeType = Literal["ags", "alpha_codium", "llm"] + + +async def llm_generate(id): case = get_human_eval_plus()[f"{id}"] + solution_result = await generate_code_block(case["prompt"], case["entry_point"]) + sample_dict = dict(task_id=case["task_id"], solution=solution_result["code_solution"]) + return sample_dict + + +async def ags_generate(id, ensemble_count: int = 5): + case = get_human_eval_plus()[f"{id}"] + solution_result = await solver(case["prompt"], ensemble_count=ensemble_count) + sample_dict = dict(task_id=case["task_id"], solution=solution_result["final_solution"]) + return sample_dict + + +async def alpha_codium_generate(id): + case = get_human_eval_plus()[f"{id}"] + solution_result = await solver.alpha_codium(case["task_id"], case["prompt"], ensemble_count=5) + sample_dict = dict(task_id=case["task_id"], solution=solution_result["final_solution"]) + return sample_dict + + +async def route_generate(mode: ModeType, id: str): if mode == "ags": - solution_result = await solver(case['prompt'],ensemble_count=5) - sample_dict = dict(task_id=case['task_id'], solution=solution_result['final_solution']) - elif mode == "alpha": - solution_result = await solver.alpha_codium(case['task_id'], case['prompt'], ensemble_count=5) - sample_dict = dict(task_id=case['task_id'], solution=solution_result['final_solution']) + sample_dict = await ags_generate(id) + elif mode == "alpha_codium": + sample_dict = await alpha_codium_generate(id) elif mode == "llm": - solution_result = await generate_code_block(case['prompt'],case['entry_point']) - sample_dict = dict(task_id=case['task_id'], solution=solution_result['code_solution']) - print(sample_dict) - with open(result_path, mode='a') as f: - f.write(json.dumps(sample_dict) + '\n') - jsonl_ranker(result_path, result_path) + sample_dict = await llm_generate(id) + else: + raise ValueError(f"Invalid mode: {mode}") + return sample_dict -async def samples_generate(mode:str, result_path:str="samples.jsonl"): - cases = list(get_human_eval_plus().values()) + +async def sample_generate(id, result_path: str = "samples.jsonl", mode: ModeType = "ags"): + sample_dict = await route_generate(mode, id) + add_jsonl_file(result_path, [sample_dict]) + sort_json_by_key(result_path, result_path) + + +async def samples_generate(mode: ModeType, result_path: str = "samples.jsonl"): + ids = list(get_human_eval_plus().keys()) file_lock = asyncio.Lock() - - async def solve_and_write(case, mode): - try: - if mode == 'llm': - solution_result = await generate_code_block(problem_description=case['prompt'], function_name=case['entry_point']) - # solution_result = await generate_code(case['prompt']) - sample_dict = { - 'task_id': case['task_id'], - 'solution': solution_result['code_solution'] - } - elif mode == "ags": - solution_result = await solver(case['prompt'], ensemble_count=5) - sample_dict = { - 'task_id': case['task_id'], - 'solution': solution_result['final_solution'] - } - elif mode == "alpha": - solution_result = await solver.alpha_codium(case['task_id'], case['prompt'], ensemble_count=1) - sample_dict = { - 'task_id': case['task_id'], - 'solution': solution_result['final_solution'] - } - # TODO 解决 final_solution 问题之后就可以开始正式测评了 - async with file_lock: - async with aiofiles.open(result_path, mode='a') as f: - await f.write(json.dumps(sample_dict) + '\n') - return None - except Exception as e: - print(e) - return case['task_id'] + @handle_exception( + exception_type=Exception, + exception_msg="Error in solve_and_write function", + default_return=lambda id, *args, **kwargs: id, + ) + async def solve_and_write(id: str, mode: ModeType) -> Optional[str]: + sample_dict = await route_generate(mode, id) + async with file_lock: + async with aiofiles.open(result_path, mode="a") as f: + await f.write(json.dumps(sample_dict) + "\n") + return None - tasks = [solve_and_write(case, mode) for case in cases] + tasks = [solve_and_write(id, mode) for id in ids] results = await asyncio.gather(*tasks) failed_tasks = [task_id for task_id in results if task_id is not None] if failed_tasks: - print(failed_tasks) - if mode == 'llm': - for task_id in failed_tasks: - case = get_human_eval_plus()[task_id] - for _ in range(3): - try: - solution_result = await generate_code_block(case['prompt'],function_name=case['entry_point']) - task_dict = { - 'task_id': case['task_id'], - 'solution': solution_result['code_solution'] - } - with open(result_path, mode='a') as f: - f.write(json.dumps(task_dict) + '\n') - failed_tasks.remove(task_id) - break - except Exception as e: - print(f"{e} \n failure {task_id}") - elif mode == "ags" or mode == "alpha": - for task_id in failed_tasks: - try: - await sample_generate(task_id,result_path,mode) - except Exception as e: - print(f"failure {task_id}") - - jsonl_ranker(result_path, result_path) - + logger.info(failed_tasks) + for task_id in failed_tasks: + try: + await sample_generate(task_id, result_path, mode) + failed_tasks.remove(task_id) + except Exception: + logger.error(f"{task_id} fail") + + sort_json_by_key(result_path, result_path) + if not failed_tasks: - # 自动 sanitize - # result_path = automatic_sanitize(result_path) if automatic_evalplus(result_path): - eval_path = result_path[:-6]+"_eval_results.json" + eval_path = result_path[:-6] + "_eval_results.json" unpassed_exapmle = extract_failure_tests(eval_path) - print(unpassed_exapmle) + logger.info(unpassed_exapmle) else: - print(failed_tasks) + logger.info(failed_tasks) -async def samples_generate_ags(): - sample_list = [] - cases = list(get_human_eval_plus().values()) - - async def solve_with_id(case): - solution_result = await solver(case['prompt'], ensemble_count=5) - return case['task_id'], solution_result['final_solution'] - - tasks = [solve_with_id(case) for case in cases] - results = await asyncio.gather(*tasks) - - for task_id, solution in results: - sample_dict = dict(task_id=task_id, solution=solution) - sample_list.append(sample_dict) - - write_jsonl("samples.jsonl", sample_list) -async def samples_generate_llm(): - sample_list = [] - cases = list(get_human_eval_plus().values()) - - async def solve_with_id(case): - solution_result = await generate_code_block(case['prompt']) - # solution_result = await generate_code(case['prompt']) - return case['task_id'], solution_result['code_solution'] - - tasks = [solve_with_id(case) for case in cases] - results = await asyncio.gather(*tasks) - - for task_id, solution in results: - sample_dict = dict(task_id=task_id, solution=solution) - sample_list.append(sample_dict) - - write_jsonl("samples.jsonl", sample_list) - -def automatic_sanitize(result_path: str = "samples.jsonl"): +@handle_exception(exception_type=subprocess.CalledProcessError, exception_msg="sanitize error", default_return=None) +def automatic_sanitize(result_path: str = "samples.jsonl") -> Optional[str]: """ 在命令行中自动执行 evalplus.sanitize --samples result_path 返回result_path前缀加上"-sanitized.jsonl" """ command = ["evalplus.sanitize", "--samples", result_path] - - try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - print(f"执行命令时出错: {e}") - return None - - # 构建sanitized文件路径 + + subprocess.run(command, check=True) + base_name = os.path.splitext(result_path)[0] sanitized_path = f"{base_name}-sanitized.jsonl" - + return sanitized_path -def automatic_evalplus(result_path:str ="samples.jsonl"): + +@handle_exception( + exception_type=subprocess.CalledProcessError, + exception_msg="Error in automatic_evalplus function", + default_return=False, +) +def automatic_evalplus(result_path: str = "samples.jsonl") -> bool: """ 在命令行中自动执行 evalplus.evaluate --dataset humaneval --samples samples.jsonl --parallel 2 --base-only """ @@ -178,41 +136,30 @@ def automatic_evalplus(result_path:str ="samples.jsonl"): sys.executable, # 使用当前 Python 解释器 "-m", "evalplus.evaluate", - "--dataset", "humaneval", - "--samples", result_path, - "--parallel", "2", - "--base-only" + "--dataset", + "humaneval", + "--samples", + result_path, + "--parallel", + "2", + "--base-only", ] - - try: - result = subprocess.run(command, check=True, capture_output=True, text=True) - print("输出:", result.stdout) - return True - except subprocess.CalledProcessError as e: - print("错误输出:", e.stderr) - return False - -def extract_failure_tests(file_path:str = "samples_eval_results.json"): - with open(file_path, 'r') as f: - task_results = json.load(f) + + result = subprocess.run(command, check=True, capture_output=True, text=True) + logger.info(f"ouptput: \n {result.stdout}") + return True + + +def extract_failure_tests(file_path: str = "samples_eval_results.json"): + task_results = read_json_file(file_path) failed_tests = [] - - for task in task_results['eval'].values(): + for task in task_results["eval"].values(): if task[0]["base_status"] == "fail": failed_test = { "task_id": task[0]["task_id"], - # "solution": task["solution"], - # "fail_tests": task["base_fail_tests"] } failed_tests.append(failed_test) - print(len(failed_tests)) - + logger.info(f"length of failed tests: {len(failed_tests)}") + return failed_tests - - -# asyncio.run(sample_generate('HumanEval/101')) -# asyncio.run(samples_generate(mode='ags')) -# jsonl_ranker("samples.jsonl", "samples.jsonl") -# {"task_id": "HumanEval/101", "solution": "def words_string(s):\n import re\n return re.split(r'[,\\s]\\s*', s)"} - diff --git a/examples/ags/benchmark/humaneval_mg.py b/examples/ags/benchmark/humaneval_mg.py deleted file mode 100644 index 67db03bc8..000000000 --- a/examples/ags/benchmark/humaneval_mg.py +++ /dev/null @@ -1,239 +0,0 @@ -# Import necessary libraries and modules -import gzip -import itertools -import json -import os -import subprocess -from typing import Dict, Iterable, List, Union - -import numpy as np -import tqdm -from loguru import logger - -# Define the root directory as the location of the script -ROOT = os.path.dirname(os.path.abspath(__file__)) - -# Define the input data file containing human evaluations -HUMAN_EVAL = r"HumanEval.jsonl.gz" - - -def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]: - """ - Reads a JSONL file containing problem evaluations and returns them as a dictionary. - - Args: - evalset_file (str): Path to the JSONL file. - - Returns: - Dict[str, Dict]: A dictionary where task IDs are keys and problem details are values. - """ - return {task["task_id"]: task for task in stream_jsonl(evalset_file)} - - -def stream_jsonl(filename: str) -> Iterable[Dict]: - """ - Parses a JSONL file and yields each line as a dictionary. - - Args: - filename (str): Path to the JSONL file. - - Yields: - Iterable[Dict]: A generator of dictionaries representing JSONL lines. - """ - if filename.endswith(".gz"): - with open(filename, "rb") as gzfp: - with gzip.open(gzfp, "rt") as fp: - for line in fp: - if any(not x.isspace() for x in line): - yield json.loads(line) - else: - with open(filename, "r") as fp: - for line in fp: - if any(not x.isspace() for x in line): - yield json.loads(line) - - -def _generate_examples(filepath, split, name="sanitized"): - if name == "full": - - def _read_lines(fn, start, end): - data = [] - with open(fn, encoding="utf-8") as f: - for line in f: - sample = json.loads(line) - if start <= sample["task_id"] <= end: - data.append(sample) - elif sample["task_id"] > end: - break - return data - - if split == "test": - data = _read_lines(filepath, 11, 510) - elif split == "train": - data = _read_lines(filepath, 601, 974) - elif split == "validation": - data = _read_lines(filepath, 511, 600) - elif split == "prompt": - data = _read_lines(filepath, 1, 10) - - elif name == "sanitized": - with open(filepath, encoding="utf-8") as f: - data = json.load(f) - if split == "test": - data = [sample for sample in data if 11 <= sample["task_id"] <= 510] - elif split == "train": - data = [sample for sample in data if 601 <= sample["task_id"] <= 974] - elif split == "validation": - data = [sample for sample in data if 511 <= sample["task_id"] <= 600] - elif split == "prompt": - data = [sample for sample in data if 1 <= sample["task_id"] <= 10] - id_ = 0 - for sample in data: - yield id_, sample - id_ += 1 - - -def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): - """ - Writes an iterable of dictionaries to a JSONL file. - - Args: - filename (str): Path to the output JSONL file. - data (Iterable[Dict]): Data to write as JSONL. - append (bool): If True, appends to an existing file, else creates a new file. - """ - # Determine the file writing mode based on the 'append' flag - if append: - mode = "ab" - else: - mode = "wb" - filename = os.path.expanduser(filename) - - # Handle .gz compression - if filename.endswith(".gz"): - with open(filename, mode) as fp: - with gzip.GzipFile(fileobj=fp, mode="wb") as gzfp: - for x in data: - gzfp.write((json.dumps(x) + "\n").encode("utf-8")) - else: - with open(filename, mode) as fp: - for x in data: - fp.write((json.dumps(x) + "\n").encode("utf-8")) - - -def execution(task_id, check_program): - """ - Executes a Python program and captures its output. - - Args: - task_id: A unique identifier for the task. - check_program: The Python program to execute. - - Returns: - bool: True if the execution was successful, False otherwise. - """ - process = subprocess.Popen(["python", "-c", f"{check_program}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - try: - # Wait for the process to complete, with a timeout - stdout, stderr = process.communicate(timeout=30) - - if len(stderr) == 0: - # logger.info(f"{task_id}: passed") - passed = True - elif b"OK" in stderr: - # logger.info(f"{task_id}: passed, {stderr}") - passed = True - - else: - logger.info(f"{task_id}: error: {stderr}") - passed = False - except subprocess.TimeoutExpired: - logger.info("The command did not complete within the given timeout.") - process.kill() # Kill the process if it times out - logger.info(f"{task_id}: error") - passed = False - return passed - - -def estimate_pass_at_k( - num_samples: Union[int, List[int], np.ndarray], num_correct: Union[List[int], np.ndarray], k: int -) -> np.ndarray: - """ - Estimates pass@k of each problem and returns them in an array. - - Args: - num_samples: Number of total samples (can be an int, list, or NumPy array). - num_correct: Number of correct samples (list or NumPy array). - k (int): The 'k' value for pass@k. - - Returns: - np.ndarray: An array of pass rates for each problem. - """ - - # Define a pass rate estimator function - def estimator(n: int, c: int, k: int) -> float: - if n - c < k: - return 1.0 - return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) - - # Determine the number of samples based on the input type - if isinstance(num_samples, int): - num_samples_it = itertools.repeat(num_samples, len(num_correct)) - else: - assert len(num_samples) == len(num_correct) - num_samples_it = iter(num_samples) - - # Calculate pass rates for each problem - return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]) - - -def evaluate(total: List, correct: List, ks: List = [1, 10]): - """ - Evaluates and logs pass rates at various 'k' values. - - Args: - total (List): List of total samples. - correct (List): List of correct samples. - ks (List): List of 'k' values to evaluate. - - Returns: - dict: A dictionary of pass rates at each 'k' value. - """ - total = np.array(total) - correct = np.array(correct) - - # Calculate and log pass rates at each 'k' value - pass_at_k = {f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() for k in ks if (total >= k).all()} - logger.info(pass_at_k) - return pass_at_k - - -if __name__ == "__main__": - logger.info("Reading samples...") - problems = read_problems(HUMAN_EVAL) - - total, correct = [], [] - passed = [] - - for sample in tqdm.tqdm(stream_jsonl("example_samples.jsonl")): - task_id = sample["task_id"] - completion = sample["completion"] - problem = problems[task_id] - - # Construct a check program - check_program = completion + "\n" + problem["test"] + "\n" + f"check({problem['entry_point']})" - - # Execute the check program and capture the result - passed_flg = execution(task_id, check_program) - - if not passed_flg: - logger.debug("error") - else: - logger.debug("passed") - passed.append(len(passed)) - - total.append(len(passed)) - correct.append(sum(passed)) - - # Evaluate pass rates at various 'k' values - evaluate(total, correct, ks=[1, 5, 10]) diff --git a/examples/ags/demo/claude.py b/examples/ags/demo/claude.py deleted file mode 100644 index b4799939a..000000000 --- a/examples/ags/demo/claude.py +++ /dev/null @@ -1,101 +0,0 @@ - -from typing import Any, Dict, List, Callable -from abc import ABC, abstractmethod - -class LLM: - def ask(self, text: str) -> str: - # Implement LLM query logic here - pass - -class Operator(ABC): - def __init__(self, llm: LLM): - self.llm = llm - - @abstractmethod - def forward(self, *args: Any, **kwargs: Any) -> Any: - pass - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - return self.forward(*args, **kwargs) - -class Generate(Operator): - def __init__(self, llm: LLM, prompt: str): - super().__init__(llm) - self.prompt = prompt - - def forward(self, input_problem: str) -> str: - return self.llm.ask(f"{self.prompt}\n{input_problem}") - -class Review(Operator): - def __init__(self, llm: LLM, criteria: List[str]): - super().__init__(llm) - self.criteria = criteria - - def forward(self, solution: str) -> Dict[str, float]: - review_prompt = f"Review the following solution based on these criteria: {', '.join(self.criteria)}\n\nSolution: {solution}" - review_result = self.llm.ask(review_prompt) - # Parse the review_result to extract scores - return {criteria: float(review_result.split(criteria)[1].split()[0]) for criteria in self.criteria} - -class Module: - def __init__(self, llm: LLM): - self.llm = llm - - def forward(self, x: Any) -> Any: - raise NotImplementedError("Subclasses must implement forward method") - - def __call__(self, x: Any) -> Any: - return self.forward(x) - -class CodeGenerationModule(Module): - def __init__(self, llm: LLM): - super().__init__(llm) - self.generate = Generate(llm, "Generate a Python function for the following problem:") - self.review = Review(llm, ["correctness", "efficiency", "readability"]) - - def forward(self, problem: str) -> Dict[str, Any]: - solution = self.generate(problem) - review = self.review(solution) - return {"solution": solution, "review": review} - -def optimize(module: Module, loss_fn: Callable[[Dict[str, Any]], float], iterations: int = 10): - for _ in range(iterations): - # This is a placeholder for the optimization logic - # In a real implementation, you would: - # 1. Run the module on some input - # 2. Compute the loss - # 3. Use the loss to improve the module (e.g., by adjusting prompts or using LLM feedback) - pass - -# Usage -llm = LLM() -code_gen = CodeGenerationModule(llm) - -# Solve a problem -result = code_gen("Write a function to calculate the factorial of a number") -print(result) - -# Define a loss function -def loss_function(output: Dict[str, Any]) -> float: - # Implement your loss computation here - # For example, you might use the review scores - return 1.0 - output["review"].get("correctness", 0) - -# Optimize the module -optimize(code_gen, loss_function, iterations=10) - -# You can also create custom modules easily -class CustomModule(Module): - def __init__(self, llm: LLM): - super().__init__(llm) - self.op1 = Generate(llm, "Custom prompt 1") - self.op2 = Review(llm, ["custom_criteria"]) - - def forward(self, x: str) -> Dict[str, Any]: - intermediate = self.op1(x) - final = self.op2(intermediate) - return {"result": final} - -custom_module = CustomModule(llm) -custom_result = custom_module("Custom input") -print(custom_result) diff --git a/examples/ags/demo/claude_2.py b/examples/ags/demo/claude_2.py deleted file mode 100644 index 54a85e69b..000000000 --- a/examples/ags/demo/claude_2.py +++ /dev/null @@ -1,82 +0,0 @@ -from metagpt import nn -import metagpt.functional as F - -class Generate(nn.Module): - def __init__(self, model_name): - super(Generate, self).__init__() - self.model = nn.LLM(model_name) - - def forward(self, prompt): - return self.model.generate(prompt) - -class Review(nn.Module): - def __init__(self, criteria): - super(Review, self).__init__() - self.criteria = criteria - - def forward(self, generated_code): - return F.analyze(generated_code, self.criteria) - -class Revise(nn.Module): - def __init__(self, model_name): - super(Revise, self).__init__() - self.model = nn.LLM(model_name) - - def forward(self, original_code, review_feedback): - prompt = f"Original code:\n{original_code}\n\nFeedback:\n{review_feedback}\n\nRevised code:" - return self.model.generate(prompt) - -class Ensemble(nn.Module): - def __init__(self, strategy='majority_vote'): - super(Ensemble, self).__init__() - self.strategy = strategy - - def forward(self, solutions): - return F.ensemble(solutions, strategy=self.strategy) - -class LLMAgent(nn.Module): - def __init__(self, generate_model, review_criteria, revise_model): - super(LLMAgent, self).__init__() - self.generate = Generate(generate_model) - self.review = Review(review_criteria) - self.revise = Revise(revise_model) - self.ensemble = Ensemble() - - def forward(self, problem_description, num_iterations=3): - solutions = [] - for _ in range(num_iterations): - # 生成初始解决方案 - initial_solution = self.generate(problem_description) - - # 审查解决方案 - review_feedback = self.review(initial_solution) - - # 根据反馈修改解决方案 - revised_solution = self.revise(initial_solution, review_feedback) - - solutions.append(revised_solution) - - # 整合多个解决方案 - final_solution = self.ensemble(solutions) - return final_solution - -# 示例使用 -problem = """ -Human: Write a function that takes a list of numbers and returns the sum of the numbers at even indices. - -Function Signature: -def sum_even_indices(numbers: List[int]) -> int: - -Example: ->>> sum_even_indices([1, 2, 3, 4, 5]) -9 # 1 + 3 + 5 = 9 -""" - -agent = LLMAgent( - generate_model="gpt-3.5-turbo", - review_criteria=["correctness", "efficiency", "readability"], - revise_model="gpt-4" -) - -solution = agent(problem) -print(solution) diff --git a/examples/ags/demo/graph.py b/examples/ags/demo/graph.py deleted file mode 100644 index 684497968..000000000 --- a/examples/ags/demo/graph.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# @Date : 6/26/2024 17:07 PM -# @Author : didi -# @Desc : graph demo of ags - -from examples.ags.demo.operator import Generate, GenerateCode, Review, Revise, Ensemble, LLM - -class Graph: - def __init__(self, name:str, llm:str) -> None: - self.name = name - self.model = llm # TODO 抽象一个逻辑,用不同的model适配不同的算子 - - def __call__(): - NotImplementedError("Subclasses must implement __call__ method") - - -class HumanEvalGraph(Graph): - def __init__(self, name:str, llm: str, criteria:str) -> None: - super().__init__(name, llm) - self.criteria = criteria # TODO 有位置参数的生成逻辑是基于算子的要求 - self.generate_code = GenerateCode(llm=LLM(model=llm)) - self.review = Review(llm=LLM(model=llm), criteria=criteria) - self.revise = Revise(llm=LLM(model=llm)) - self.ensemble = Ensemble(llm=LLM(model=llm)) - - def __call__(self, problem): - # TODO 我先来实现一版不带Ensemble的版本 - solution = self.generate_code(problem) - # review & revise loop - for _ in range(3): - review_feedback = self.review(problem, solution) - if review_feedback['result']: - break - solution = self.revise(solution, review_feedback['feedback']) - return solution - - diff --git a/examples/ags/demo/medprompt.py b/examples/ags/demo/medprompt.py deleted file mode 100644 index 8d4ca732a..000000000 --- a/examples/ags/demo/medprompt.py +++ /dev/null @@ -1,168 +0,0 @@ -# 第一段代码是MedPrompt,一种利用利用LLM产生多种答案,然后进行洗牌投票来选出最优决策的方法 -# 我需要你首先理解这个方法,然后将这个方法与我的代码结合起来 -# 我的代码如下,我们会接收到多个答案,我需要你将这个答案利用MedPrompt的方法进行处理。 -# 在我的代码中,产生llm answer是用 await ActionNode.from_pydantic(ScEnsembleOp).fill(context=prompt, llm=self.llm) 实现的。 - -class ScEnsemble(Ensemble): - - def __init__(self, name:str ="Ensembler", llm: LLM = LLM()): - super().__init__(name, llm) - - async def __call__(self, solutions:List, problem_description): - solution_text = "" - for index, solution in enumerate(solutions): - solution_text += f"Solution{index}: {str(solution)}" + "\n" - - prompt = ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description) - node = await ActionNode.from_pydantic(ScEnsembleOp).fill(context=prompt, llm=self.llm) - response = node.instruct_content.model_dump() - return response - -class Medprompt(QASystem): - def __init__( - self, - agents: list, - num_reasoning_steps: int, - debate_prompts: dict, - verbose: bool = False, - name: Optional[str] = None, - mock: bool = False, # Unused - agent_prompts: Optional[dict] = None, # Unused - ): - super().__init__(verbose=verbose) - - assert len(agents) == 1 - self._num_reasoning_steps = num_reasoning_steps - self._agent = agents[0] - self._agent_names = [type(agent).__name__ for agent in agents] - self.prompts = debate_prompts - - """ - This is an implementation of the Medprompt system take - from https://arxiv.org/abs/2311.16452 - - The system is comprised of a single agent prompted to provide multiple - answers and explainations via temperature sampling and question shuffling. - The final answer is determined by taking the most frequent answer provided - by the agent during the aggregation. - - IMPORTANT: The current implementation only contains the first three steps - of the Medprompt setup. Therefore additional improvements can be made - by including the kNN and Ensemble with choice shuffling as well. - """ - - # Setup debate metrics - def metrics( - self, info: Dict[str, Any], format_solution_fn: Callable, solution: str - ) -> Dict[str, Any]: - return construct_agent_metrics( - info=info, - format_solution_fn=format_solution_fn, - solution=solution, - verbose=self._verbose, - agents=["Agent_0"], - agent_names=self._agent_names, - num_rounds=self._num_reasoning_steps, - ) - - @staticmethod - def shuffle_answers(question: str) -> Tuple[str, Any]: - """ - Takes in a multiple choice question string and shuffles only the answer texts, - keeping the answer labels (A, B, C, etc.) intact. - Also returns a mapping of shuffled choices to original choices. - """ - # Find the start of the answer section (e.g., '\nA:') - answer_section_start = re.search(r"\n[A-Z]:", question).start() # type: ignore - - # Split the question from the answers - main_question = question[:answer_section_start] - answers = question[answer_section_start + 1 :].split("\n") - - # Filter out answers that are not in the correct format - # answers = [answer for answer in answers if ": " == answer[1:3]] - - # Extract answer texts - answer_texts = [answer.split(": ", 1)[1] for answer in answers] - - # assert len(answer_texts) > 0 - - # Shuffle the answer texts and create a mapping to original answers - shuffled_texts = answer_texts.copy() - random.shuffle(shuffled_texts) - answer_mapping = { - chr(65 + i): answers[answer_texts.index(text)][0] - for i, text in enumerate(shuffled_texts) - } - - # Reassemble the shuffled answers with original labels - shuffled_answers = [ - f"{chr(65 + i)}: {text}" for i, text in enumerate(shuffled_texts) - ] - - # Reassemble the question - shuffled_question = main_question + "\n" + "\n".join(shuffled_answers) - return shuffled_question, answer_mapping - - def answer( - self, - question: str, - ) -> Tuple[str, Any]: - - agent_answers: Any = {"Agent_0": {}} - agent_info: Any = {"Agent_0": {}} - agent_responses: Any = {"Agent_0": {}} - if self._verbose: - print("#######################") - print("REASONING STEP") - print("#######################") - - message_history: List[Dict[str, str]] = [] - - for i in range(self._num_reasoning_steps): - - try: - # TODO: Provide the options to the system as well. This would - # make it much easier to shuffle the answers. Furthermore, remove - # all questions without options in load_datasets.py. - shuffled_question, answer_mapping = self.shuffle_answers(question) - except Exception as e: - shuffled_question = question - answer_mapping = {"A": "A", "B": "B", "C": "C", "D": "D", "E": "E"} - print("question: ", question) - print("Shuffling failed, using original question: ", e) - - answer, info = self._agent.answer( - question=shuffled_question, - system_message=self.prompts["system"], - ) - - # Dummy data to check the suffler. - # answer = "A" - # info = {"prompt_tokens": 1234, "response_tokens": 1234, - # "response": "I don't know, A.", - # "cost": 0.0, "num_messages_removed": 0.0, - # "answer_duration": 1.0, "engine": "Diesel"} - - # Map the answer back to the original answer - if answer in answer_mapping: - answer = answer_mapping[answer] - - message_history.append( - {"agent_name": f"Reasoning_{i}", "content": info["response"]} - ) - agent_answers["Agent_0"][f"Reasoning_{i}"] = answer - agent_responses["Agent_0"][f"Reasoning_{i}"] = info["response"] - agent_info["Agent_0"][f"Reasoning_{i}"] = info - - final_answers = [ - agent_answers["Agent_0"][f"Reasoning_{i}"] - for i in range(self._num_reasoning_steps) - ] - answer, _ = most_frequent(final_answers) - - return answer, { - "response": agent_responses, - "agent_answers": agent_answers, - "agent_info": agent_info, - } \ No newline at end of file diff --git a/examples/ags/demo/operator.py b/examples/ags/demo/operator.py deleted file mode 100644 index 9f76f23db..000000000 --- a/examples/ags/demo/operator.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- -# @Date : 6/26/2024 17:07 PM -# @Author : didi -# @Desc : operator demo of ags - -import json -from openai import OpenAI -from examples.ags.demo.prompt import GENERATE_PROMPT, GENERATE_CODE_PROMPT, REVIEW_PROMPT, REVISE_PROMPT, ENSEMBLE_PROMPT - -class LLM(): - def __init__(self, model:str='gpt-4-turbo', timeout:int=60): - self.model = model - self.timeout = timeout - self.api_key = '' - self.base_url = '' - self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) - self.system_prompt = None - - def ask(self, text: str, json_mode: bool = False, temperature: float = 0.7, retries: int = 5): - response_type = "text" if not json_mode else "json_object" - messages = [{"role": "user", "content": text}] if self.system_prompt == None else [ - {"role": "system", "content": self.system_prompt}, {"role": "user", "content": text}] - for i in range(retries): - try: - response = self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=temperature, - response_format={"type": response_type} - ) - if json_mode: - result = response.choices[0].message.content - result = json.loads(result) - else: - result = response.choices[0].message.content - print(result) - return result - except Exception as e: - print(f"{__name__} occurs: {e}") - - -class Operator: - def __init__(self, name, llm:LLM=None): - self.name = name - self.llm = llm - - def __call__(self, *args, **kwargs): - raise NotImplementedError - -class Generate(Operator): - """ - Generate code & Generate text 应该被分开 - """ - def __init__(self, name:str ="Generator", llm: LLM = LLM()): - super().__init__(name, llm) - - def __call__(self, problem_description): - prompt = GENERATE_PROMPT.format(problem_description=problem_description) - response = self.llm.ask(prompt, json_mode=True) - return {"solution": response.get("solution")} - -class GenerateCode(Operator): - - def __init__(self, name:str ="Coder", llm: LLM = LLM()): - super().__init__(name, llm) - - def __call__(self, problem_description): - prompt = GENERATE_CODE_PROMPT.format(problem_description=problem_description) - response = self.llm.ask(prompt, json_mode=True) - return {"code": response.get("code")} - -class Review(Operator): - - def __init__(self, criteria, name:str ="Reviewer", llm: LLM = LLM()): - self.criteria = criteria - super().__init__(name, llm) - - # TODO 有点搞笑,我忘记加上criteria了 - def __call__(self, problem_description, solution): - prompt = REVIEW_PROMPT.format(problem_description=problem_description, solution=solution) - response = self.llm.ask(prompt, json_mode=True) - if response.get("result") == True: - return {"result": True} - else: - return {"result":False, "feedback":response.get('feedback')} - -class Revise(Operator): - - def __init__(self, name:str ="Reviser", llm: LLM = LLM()): - super().__init__(name, llm) - - def __call__(self, problem_description, solution, feedback): - prompt = REVISE_PROMPT.format(problem_description=problem_description, solution=solution, feedback=feedback) - response = self.llm.ask(prompt, json_mode=True) - return {"revised_solution": response.get("revised_solution")} - -class Ensemble(Operator): - - def __init__(self, name:str ="Ensembler", llm: LLM = LLM()): - super().__init__(name, llm) - - def __call__(self, *args, problem_description): - solutions = "" - for solution in args: - solutions += solution + "\n" - prompt = ENSEMBLE_PROMPT.format(solutions=solutions, problem_description=problem_description) - response = self.llm.ask(prompt, json_mode=True) - return {"ensembled_solution": response.get("ensembled_solution")} - diff --git a/examples/ags/demo/prompt.py b/examples/ags/demo/prompt.py deleted file mode 100644 index b12957128..000000000 --- a/examples/ags/demo/prompt.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -# @Date : 6/26/2024 17:07 PM -# @Author : didi -# @Desc : prompts of operators - - -GENERATE_PROMPT = """ -Generate Solution for the following problem: {problem_description} - -Please structure your response in JSON format as follows: -{{ - "solution": "" -}} -""" - -GENERATE_CODE_PROMPT = """ -Generate Code Solution for the following problem: {problem_description} - -Please structure your response in JSON format as follows: -{{ - "code": "" -}} -""" - -REVIEW_PROMPT = """ -For the question described as {problem_description}, -please review the following solution: {solution}, and provide a review result in boolean format. -If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments - -Please structure your response in JSON format as follows: -{{ - "result": , - "comment": "" -}} -""" - -REVISE_PROMPT = """ -For the question described as {problem_description}, -please evaluate and revise the solution provided: {solution}, taking into account the review comments: {comment}." -Then output the revised solution. - -Please structure your response in JSON format as follows: -{{ - "revised_solution": "" -}} - -""" - -ENSEMBLE_PROMPT = """ -For the question described as {problem_description}, -please ensemble the following solutions: {solutions}, and provide an ensemble result. - -Please structure your response in JSON format as follows: -{{ - "ensembled_solution": "" -}} -""" diff --git a/examples/ags/w_action_node/graph.py b/examples/ags/w_action_node/graph.py index 217371cb9..c0557a1dd 100644 --- a/examples/ags/w_action_node/graph.py +++ b/examples/ags/w_action_node/graph.py @@ -3,26 +3,41 @@ # @Author : didi # @Desc : graph & an instance - humanevalgraph -from metagpt.llm import LLM from typing import List -from examples.ags.w_action_node.operator import Generate, GenerateCode, GenerateCodeBlock, Review, Revise, FuEnsemble, MdEnsemble, DbEnsemble, Rephrase, Test -from examples.ags.w_action_node.utils import extract_test_cases_from_jsonl + from evalplus.data import get_human_eval_plus + +from examples.ags.w_action_node.operator import ( + FuEnsemble, + Generate, + GenerateCode, + GenerateCodeBlock, + MdEnsemble, + Rephrase, + Review, + Revise, + Test, +) +from examples.ags.w_action_node.utils import extract_test_cases_from_jsonl +from metagpt.llm import LLM + + class Graph: - def __init__(self, name:str, llm:LLM) -> None: + def __init__(self, name: str, llm: LLM) -> None: self.name = name - self.model = llm + self.model = llm def __call__(): NotImplementedError("Subclasses must implement __call__ method") - def optimize(dataset:List): + def optimize(dataset: List): pass + class HumanEvalGraph(Graph): - def __init__(self, name:str, llm: LLM, criteria:str, vote_count:int =5) -> None: + def __init__(self, name: str, llm: LLM, criteria: str, vote_count: int = 5) -> None: super().__init__(name, llm) - self.criteria = criteria # TODO 自动构建图时,图的初始参数与图所使用的算子要求的外部参数相匹配 + self.criteria = criteria # TODO 自动构建图时,图的初始参数与图所使用的算子要求的外部参数相匹配 self.generate_code = GenerateCode(llm=llm) self.generate_code_block = GenerateCodeBlock(llm=llm) self.review = Review(llm=llm, criteria=criteria) @@ -32,82 +47,82 @@ class HumanEvalGraph(Graph): self.fuensemble = FuEnsemble(llm=llm) self.mdensemble = MdEnsemble(llm=llm, vote_count=vote_count) - async def __call__(self, problem:str, ensemble_count:int = 3): + async def __call__(self, problem: str, ensemble_count: int = 3): solution_list = [] for _ in range(ensemble_count): - for retry_count in range(5): - try: - # solution = await self.generate_code(problem) - solution = await self.generate_code_block(problem) - solution = solution.get('code_solution') - solution_list.append(solution) - break - except Exception as e: - print(e) + solution = await self.generate_code_block(problem) + solution = solution.get("code_solution") + solution_list.append(solution) solution = await self.mdensemble("code", solution_list, problem) return solution - - async def alpha_codium(self, problem_id:str, problem:str, ensemble_count:int = 3): - # async def __call__(self,problem_id, problem:str, ensemble_count:int = 3): + + async def alpha_codium(self, problem_id: str, problem: str, ensemble_count: int = 3): + """ + Paper: Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering + Link: https://arxiv.org/abs/2404.14963 + Flow: An incomplete version of alpha codium, implementing the basic process of rephrase -> code ensemble -> tes + """ test_cases = extract_test_cases_from_jsonl(problem_id) - entry_point = get_human_eval_plus()[problem_id]['entry_point'] - rephrase_problem = await self.rephrase(problem) # 在rephrase 中拼接原始的问题描述 + entry_point = get_human_eval_plus()[problem_id]["entry_point"] + rephrase_problem = await self.rephrase(problem) # 在rephrase 中拼接原始的问题描述 solution_list = [] for _ in range(ensemble_count): - for retry_count in range(5): - try: - solution = await self.generate_code_block.rephrase_generate(problem, rephrase_problem, function_name=entry_point) - solution = solution.get('code_solution') - solution_list.append(solution) - break - except Exception as e: - print(e) + solution = await self.generate_code_block.rephrase_generate( + problem, rephrase_problem, function_name=entry_point + ) + solution = solution.get("code_solution") + solution_list.append(solution) solution = await self.mdensemble("code", solution_list, problem) solution = await self.tester(problem_id, problem, rephrase_problem, solution, test_cases) return solution - async def review_revise_ensemble(self, problem:str, ensemble_count:int = 2): + async def review_revise_ensemble(self, problem: str, ensemble_count: int = 2, revise_round: int = 3): solution_list = [] for _ in range(ensemble_count): - solution = await self.single_solve(problem, 3) + solution = await self.single_solve(problem, revise_round) solution_list.append(solution) solution = await self.ensemble(solution_list, problem) return solution - async def simple_ensemble(self, problem:str, ensemble_count:int = 3): - # async def __call__(self, problem:str, ensemble_count:int = 3): + async def simple_ensemble(self, problem: str, ensemble_count: int = 3): solution_list = [] for _ in range(ensemble_count): solution = await self.generate_code(problem) # solution = await self.generate_code_block(problem) - solution = solution.get('code_solution') + solution = solution.get("code_solution") solution_list.append(solution) solution = await self.fuensemble(solution_list, problem) return solution - - async def single_solve(self, problem:str, max_loop:int): + + async def single_solve(self, problem: str, max_loop: int): solution = await self.generate_code(problem) - solution = solution.get('code_solution') + solution = solution.get("code_solution") for _ in range(max_loop): review_feedback = await self.review(problem, solution) - if review_feedback['review_result']: + if review_feedback["review_result"]: break - solution = await self.revise(problem, solution, review_feedback['feedback']) - solution = solution.get('revised_solution') + solution = await self.revise(problem, solution, review_feedback["feedback"]) + solution = solution.get("revised_solution") return solution - + + class Gsm8kGraph(Graph): - def __init__(self, name:str, llm: LLM) -> None: + def __init__(self, name: str, llm: LLM) -> None: super().__init__(name, llm) self.generate = Generate(llm=llm) self.rephrase = Rephrase(llm=llm) - - async def __call__(self, problem:str): + + async def __call__(self, problem: str): + solution = self.generate(problem) + return solution + + +class HotpotQAGraph(Graph): + def __init__(self, name: str, llm: LLM) -> None: + super().__init__(name, llm) + self.generate = Generate(llm=llm) + self.rephrase = Rephrase(llm=llm) + + async def __call__(self, problem: str): solution = self.generate(problem) return solution - - # async def __call__(self, problem:str): - # 这个地方没有修改对应的prompt,可以对应着humaneval改一下 - # problem = await self.rephrase(problem) - # solution = self.generate(problem) - # return solution \ No newline at end of file diff --git a/examples/ags/w_action_node/operator.py b/examples/ags/w_action_node/operator.py index 1069b73e5..72c0b30fc 100644 --- a/examples/ags/w_action_node/operator.py +++ b/examples/ags/w_action_node/operator.py @@ -3,30 +3,60 @@ # @Author : didi # @Desc : operator demo of ags import ast +import random import sys import traceback -import random -from typing import List, Tuple, Any, Dict from collections import Counter +from typing import Dict, List, Tuple -from metagpt.actions.action_node import ActionNode -from metagpt.llm import LLM +from tenacity import retry, stop_after_attempt -from examples.ags.w_action_node.operator_an import GenerateOp, GenerateCodeOp, GenerateCodeBlockOp ,ReviewOp, ReviseOp, FuEnsembleOp, MdEnsembleOp, ReflectionTestOp, RephraseOp -from examples.ags.w_action_node.prompt import GENERATE_PROMPT, GENERATE_CODE_PROMPT, GENERATE_CODEBLOCK_PROMPT, REVIEW_PROMPT, REVISE_PROMPT, FU_ENSEMBLE_PROMPT, MD_ENSEMBLE_PROMPT, REFLECTION_ON_PUBILIC_TEST_PROMPT, REPHRASE_ON_PROBLEM_PROMPT, GENERATE_CODEBLOCK_REPHRASE_PROMPT -from examples.ags.w_action_node.prompt import DE_ENSEMBLE_CODE_FORMAT_PROMPT, DE_ENSEMBLE_TXT_FORMAT_PROMPT, DE_ENSEMBLE_ANGEL_PROMPT, DE_ENSEMBLE_DEVIL_PROMPT, DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT, DE_ENSEMBLE_JUDGE_FINAL_PROMPT +from examples.ags.w_action_node.operator_an import ( + FuEnsembleOp, + GenerateCodeBlockOp, + GenerateCodeOp, + GenerateOp, + MdEnsembleOp, + ReflectionTestOp, + RephraseOp, + ReviewOp, + ReviseOp, +) +from examples.ags.w_action_node.prompt import ( + DE_ENSEMBLE_ANGEL_PROMPT, + DE_ENSEMBLE_CODE_FORMAT_PROMPT, + DE_ENSEMBLE_DEVIL_PROMPT, + DE_ENSEMBLE_JUDGE_FINAL_PROMPT, + DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT, + DE_ENSEMBLE_TXT_FORMAT_PROMPT, + FU_ENSEMBLE_PROMPT, + GENERATE_CODE_PROMPT, + GENERATE_CODEBLOCK_PROMPT, + GENERATE_CODEBLOCK_REPHRASE_PROMPT, + GENERATE_PROMPT, + MD_ENSEMBLE_PROMPT, + REFLECTION_ON_PUBLIC_TEST_PROMPT, + REPHRASE_ON_PROBLEM_PROMPT, + REVIEW_PROMPT, + REVISE_PROMPT, +) from examples.ags.w_action_node.utils import test_cases_2_test_functions +from metagpt.actions.action_node import ActionNode +from metagpt.llm import LLM +from metagpt.logs import logger + class Operator: - def __init__(self, name, llm:LLM): + def __init__(self, name, llm: LLM): self.name = name self.llm = llm def __call__(self, *args, **kwargs): raise NotImplementedError + class Generate(Operator): - def __init__(self, name:str ="Generator", llm: LLM = LLM()): + def __init__(self, name: str = "Generate", llm: LLM = LLM()): super().__init__(name, llm) async def __call__(self, problem_description): @@ -34,10 +64,10 @@ class Generate(Operator): node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() return response - -class GenerateCode(Operator): - def __init__(self, name:str ="Coder", llm: LLM = LLM()): + +class GenerateCode(Operator): + def __init__(self, name: str = "GenerateCode", llm: LLM = LLM()): super().__init__(name, llm) async def __call__(self, problem_description): @@ -45,39 +75,49 @@ class GenerateCode(Operator): node = await ActionNode.from_pydantic(GenerateCodeOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() return response - -class GenerateCodeBlock(Operator): - def __init__(self, name:str ="Coder", llm: LLM = LLM()): + +class GenerateCodeBlock(Operator): + def __init__(self, name: str = "GenerateCodeBlock", llm: LLM = LLM()): super().__init__(name, llm) + @retry(stop=stop_after_attempt(3)) async def __call__(self, problem_description, function_name): prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem_description) - node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(context=prompt, llm=self.llm, mode='code_fill',function_name=function_name) + node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill( + context=prompt, llm=self.llm, mode="code_fill", function_name=function_name + ) response = node.instruct_content.model_dump() return response + @retry(stop=stop_after_attempt(3)) async def rephrase_generate(self, problem_description, rephrase_problem, function_name): - prompt = GENERATE_CODEBLOCK_REPHRASE_PROMPT.format(problem_description=problem_description,rephrase_problem=rephrase_problem) - node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(context=prompt, llm=self.llm, mode='code_fill', function_name=function_name) + prompt = GENERATE_CODEBLOCK_REPHRASE_PROMPT.format( + problem_description=problem_description, rephrase_problem=rephrase_problem + ) + node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill( + context=prompt, llm=self.llm, mode="code_fill", function_name=function_name + ) response = node.instruct_content.model_dump() return response - + + class Review(Operator): - - def __init__(self, criteria, name:str ="Reviewer", llm: LLM = LLM()): + def __init__(self, criteria, name: str = "Review", llm: LLM = LLM()): self.criteria = criteria super().__init__(name, llm) async def __call__(self, problem_description, solution): - prompt = REVIEW_PROMPT.format(problem_description=problem_description, solution=solution, criteria=self.criteria) + prompt = REVIEW_PROMPT.format( + problem_description=problem_description, solution=solution, criteria=self.criteria + ) node = await ActionNode.from_pydantic(ReviewOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() return response -class Revise(Operator): - def __init__(self, name:str ="Reviser", llm: LLM = LLM()): +class Revise(Operator): + def __init__(self, name: str = "Revise", llm: LLM = LLM()): super().__init__(name, llm) async def __call__(self, problem_description, solution, feedback): @@ -86,12 +126,16 @@ class Revise(Operator): response = node.instruct_content.model_dump() return response -class FuEnsemble(Operator): - def __init__(self, name:str ="FuseEnsembler", llm: LLM = LLM()): +class FuEnsemble(Operator): + """ + Function: Critically evaluating multiple solution candidates, synthesizing their strengths, and developing an enhanced, integrated solution. + """ + + def __init__(self, name: str = "FuEnsemble", llm: LLM = LLM()): super().__init__(name, llm) - async def __call__(self, solutions:List, problem_description): + async def __call__(self, solutions: List, problem_description): solution_text = "" for solution in solutions: solution_text += str(solution) + "\n" @@ -99,16 +143,18 @@ class FuEnsemble(Operator): node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() return response - + + class MdEnsemble(Operator): """ - MedPrompt - + Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine + Link: https://arxiv.org/abs/2311.16452 """ - def __init__(self, name:str ="MedEnsembler", llm: LLM = LLM(), vote_count:int=3): + + def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3): super().__init__(name, llm) self.vote_count = vote_count - + @staticmethod def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]: shuffled_solutions = solutions.copy() @@ -116,12 +162,10 @@ class MdEnsemble(Operator): answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)} return shuffled_solutions, answer_mapping - async def __call__(self, solution_type:str ,solutions:List[str], problem_description:str): - print(solutions) + async def __call__(self, solution_type: str, solutions: List[str], problem_description: str): all_responses = [] - # 如果Solution方案是Code,我们利用AST去重 + # 当Ensmeble方案是Code类型时,我们使用AST进行去重 if solution_type == "code": - original_length = len(solutions) unique_structures = {} updated_solutions = [] @@ -129,72 +173,63 @@ class MdEnsemble(Operator): try: tree = ast.parse(solution) structure_key = ast.dump(tree, annotate_fields=False, include_attributes=False) - + if structure_key not in unique_structures: unique_structures[structure_key] = solution updated_solutions.append(solution) except SyntaxError: # If the solution has a syntax error, we'll skip it - print("here",solution) continue solutions = updated_solutions updated_length = len(solutions) - # print(f"Original number of solutions: {original_length}") - # print(f"Updated number of solutions: {updated_length}") if updated_length == 1: return {"final_solution": solutions[0]} + for _ in range(self.vote_count): shuffled_solutions, answer_mapping = self.shuffle_answers(solutions) - + solution_text = "" for index, solution in enumerate(shuffled_solutions): solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n" - + prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description) node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() - - answer = response.get('solution_letter', '') + + answer = response.get("solution_letter", "") answer = answer.strip().upper() - + if answer in answer_mapping: original_index = answer_mapping[answer] - print(f"original index: {original_index}") + # print(f"original index: {original_index}") all_responses.append(original_index) - + most_frequent_index = Counter(all_responses).most_common(1)[0][0] - print(f"most frequent_index: {most_frequent_index}") final_answer = solutions[most_frequent_index] - print(f"final answer: \n{final_answer}") - # final_answer, frequency = self.most_frequent(all_responses) return {"final_solution": final_answer} + class ScEnsemble(Operator): """ - self consistency ensemble + Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models + Link: https://arxiv.org/abs/2203.11171 """ - # ScEnsemble 的构建相对好做一点 30分钟左右 pass -class DbEnsemble(Operator): + +class MADEnsemble(Operator): """ - (Should we be going MAD? A Look at Multi-Agent Debate Strategies for LLMs) - The system is a multi-round debate system where each agent is given the - question and responses generated by all agents. For each round, a judge - analyzes the responses provided determines whether to terminate the - debate or keep going. At the end of the debate the judge is also responsible - for determining the final answer. + Paper: Should we be going MAD? A Look at Multi-Agent Debate Strategies for LLMs + Link: https://arxiv.org/abs/2311.17371 """ - def __init__(self, name:str ="DebateEnsemble", llm: LLM = LLM()): + + def __init__(self, name: str = "DebateEnsemble", llm: LLM = LLM()): super().__init__(name, llm) - self.agents = ["angel","devil","judge"] - self.format_requirements = { - "txt":DE_ENSEMBLE_TXT_FORMAT_PROMPT, - "code":DE_ENSEMBLE_CODE_FORMAT_PROMPT - } - - def get_system_prompt(self, name:str, mode:str='txt'): + self.agents = ["angel", "devil", "judge"] + self.format_requirements = {"txt": DE_ENSEMBLE_TXT_FORMAT_PROMPT, "code": DE_ENSEMBLE_CODE_FORMAT_PROMPT} + + def get_system_prompt(self, name: str, mode: str = "txt"): if name == "angel": if mode == "code": return DE_ENSEMBLE_ANGEL_PROMPT + "\n" + DE_ENSEMBLE_CODE_FORMAT_PROMPT @@ -205,10 +240,10 @@ class DbEnsemble(Operator): return DE_ENSEMBLE_DEVIL_PROMPT + "\n" + DE_ENSEMBLE_TXT_FORMAT_PROMPT elif name == "judge": if mode == "final": - return DE_ENSEMBLE_JUDGE_FINAL_PROMPT + return DE_ENSEMBLE_JUDGE_FINAL_PROMPT return DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT - - def construct_messages(self, message_history_with_name, name, mode:str="txt", phase:str="universal"): + + def construct_messages(self, message_history_with_name, name, mode: str = "txt", phase: str = "universal"): """ 基于name与mode来构建system message. 基于name来构建messages @@ -221,67 +256,63 @@ class DbEnsemble(Operator): elif name == "judge": messages = self._construct_judge(message_history_with_name, mode, messages) return messages - + def _construct_debate(self, message_history_with_name, name, messages): user_message = "" - + for message in message_history_with_name: if message["name"] == "Judge": continue elif message["name"] == name: if user_message: - messages.append({ - "role": "user", - "name": "user", - "content": user_message.strip("\n"), - }) - messages.append({ - "role": "assistant", - "name": name, - "content": message["content"], - }) + messages.append( + { + "role": "user", + "name": "user", + "content": user_message.strip("\n"), + } + ) + messages.append( + { + "role": "assistant", + "name": name, + "content": message["content"], + } + ) user_message = "" else: user_message += message["content"] - + if user_message: - messages.append({ - "role": "user", - "name": "user", - "content": user_message.strip("\n"), - }) - + messages.append( + { + "role": "user", + "name": "user", + "content": user_message.strip("\n"), + } + ) + return messages def _construct_judge(self, message_history_with_name, mode, messages): pass - async def debate_answer(self, message_history:List, role:str="angel"): + async def debate_answer(self, message_history: List, role: str = "angel"): messages = self.construct_messages(message_history, role) response = await self.llm.acompletion_text(messages=messages) - message_history.append({ - "role":"user", - "name":role, - "content":response} - ) + message_history.append({"role": "user", "name": role, "content": response}) return message_history, response - async def judge_answer(self, message_history:List, phase:str="universal"): + async def judge_answer(self, message_history: List, phase: str = "universal"): messages = self.construct_messages(message_history, "judge", phase=phase) response = await self.llm.acompletion_text(messages=messages) - message_history.append({ - "role": "user", - "name": "judge", - "content": response} - ) + message_history.append({"role": "user", "name": "judge", "content": response}) return message_history, response - async def __call__(self, origin_solution:str, problem_description:str, max_round:int = 3, mode:str='txt'): + async def __call__(self, origin_solution: str, problem_description: str, max_round: int = 3, mode: str = "txt"): # 思路,输入一个原始答案,构建一个agent代表这个答案进行辩论;另一个agent(devil)使用debate llm的内容进行辩论;法官在每一轮次做出决定是否终止,到了maxround还没终止就由法官进行总结。 - message_history_with_name = [ - {"role":"user", "name":"angel", "content":origin_solution} - ] - + message_history_with_name = [{"role": "user", "name": "angel", "content": origin_solution}] + for index in range(max_round): for agent in self.agents: if agent == "angel": @@ -291,89 +322,108 @@ class DbEnsemble(Operator): elif agent == "devil": message_history_with_name, rsp = self.debate_answer(message_history_with_name, role="devil") elif agent == "judge": - message_history_with_name, judge_result = self.judge_answer(message_history_with_name, phase="universal") + message_history_with_name, judge_result = self.judge_answer( + message_history_with_name, phase="universal" + ) if not judge_result["is_debating"]: """ 这里需要在 self.judge_answer 中设置一个自动给出solution的地方 """ - return {"final_solution":judge_result["final_solution"]} - - message_history_with_name.pop(-1) - message_history_with_name, judge_answer = self.judge_answer(message_history_with_name, phase="final") + return {"final_solution": judge_result["final_solution"]} + + message_history_with_name.pop(-1) + message_history_with_name, judge_answer = self.judge_answer(message_history_with_name, phase="final") + + return {"final_solution": judge_answer["debate_answer"]} - return {"final_solution":judge_answer["debate_answer"]} class Rephrase(Operator): """ - 1. AlphaCodium - 2. https://arxiv.org/abs/2404.14963 + Paper: Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering + Link: https://arxiv.org/abs/2404.14963 + Paper: Achieving >97% on GSM8K: Deeply Understanding the Problems Makes LLMs Better Solvers for Math Word Problems + Link: https://arxiv.org/abs/2404.14963 """ - def __init__(self, name:str ="Rephraser", llm: LLM = LLM()): + + def __init__(self, name: str = "Rephrase", llm: LLM = LLM()): super().__init__(name, llm) - async def __call__(self, problem_description:str)->str: + async def __call__(self, problem_description: str) -> str: prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem_description) node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() return response["rephrased_problem"] - + + class Test(Operator): - def __init__(self, name:str ="Tester", llm: LLM = LLM()): + def __init__(self, name: str = "Test", llm: LLM = LLM()): super().__init__(name, llm) - + def exec_code(self, solution, test_cases, problem_id): - # TODO 未来还要做修改,最好能做到一个样例一测 + # TODO + # 1. 获取更加详细的Test error信息 + # 2. 更换Public Test数据集,当前使用的数据存在Label Leak(使用的Reflexion的数据集) -> 这个问题使用LLM抽取解决,直接生成为assert代码串 + # 3. 实现单独测试每一个test case -> 1 solution = solution["final_solution"] test_code = test_cases_2_test_functions(solution, test_cases) - print("test_code", test_code) try: exec(test_code, globals()) except AssertionError as e: exc_type, exc_value, exc_traceback = sys.exc_info() tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback) with open("tester.txt", "a") as f: - f.write("test_error" +problem_id + "\n") - error_infomation = {"test_fail_case": { - "error_type": "AssertionError", - "error_message": str(e), - "traceback": tb_str - }} - print("error here", error_infomation) + f.write("test_error" + problem_id + "\n") + error_infomation = { + "test_fail_case": {"error_type": "AssertionError", "error_message": str(e), "traceback": tb_str} + } + logger.info(f"test error: {error_infomation}") return error_infomation except Exception as e: with open("tester.txt", "a") as f: f.write(problem_id + "\n") - return {"exec_fail_case":str(e)} + return {"exec_fail_case": str(e)} return [] async def __call__(self, problem_id, problem, rephrase_problem, solution, test_cases): result = self.exec_code(solution, test_cases, problem_id) - print("result here", result) if result == []: return solution - # 处理代码执行失败的代码 elif "exec_fail_case" in result: result = result["exec_fail_case"] - prompt = REFLECTION_ON_PUBILIC_TEST_PROMPT.format(problem_description=problem, rephrase_problem=rephrase_problem, code_solution=solution, exec_pass=f"executed unsuccessfully, error: \n {result}", test_fail="executed unsucessfully") + prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format( + problem_description=problem, + rephrase_problem=rephrase_problem, + code_solution=solution, + exec_pass=f"executed unsuccessfully, error: \n {result}", + test_fail="executed unsucessfully", + ) node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() - return {"final_solution":response["refined_solution"]} + return {"final_solution": response["refined_solution"]} else: result = result["test_fail_case"] - prompt = REFLECTION_ON_PUBILIC_TEST_PROMPT.format(problem_description=problem, rephrase_problem=rephrase_problem, code_solution=solution, exec_pass="executed successfully", test_fail=result) + prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format( + problem_description=problem, + rephrase_problem=rephrase_problem, + code_solution=solution, + exec_pass="executed successfully", + test_fail=result, + ) node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm) response = node.instruct_content.model_dump() - return {"final_solution":response["refined_solution"]} - + return {"final_solution": response["refined_solution"]} + + class FindFact(Operator): - pass + def __init__(self, name: str = "FindFact", llm: LLM = LLM()): + super().__init__(name, llm) + class SelfAsk(Operator): - pass + def __init__(self, name: str = "SelfAsk", llm: LLM = LLM()): + super().__init__(name, llm) + class Verify(Operator): - """ - ? 还没有想好 - """ - pass - + def __init__(self, name: str = "Verify", llm: LLM = LLM()): + super().__init__(name, llm) diff --git a/examples/ags/w_action_node/operator_an.py b/examples/ags/w_action_node/operator_an.py index 2cad6b9fc..9008742fa 100644 --- a/examples/ags/w_action_node/operator_an.py +++ b/examples/ags/w_action_node/operator_an.py @@ -5,26 +5,42 @@ from pydantic import BaseModel, Field + class GenerateOp(BaseModel): solution: str = Field(default="", description="Your Solution for this problem") + class GenerateCodeOp(BaseModel): code_solution: str = Field(default="", description="Complete and correct code here.") + class GenerateCodeBlockOp(BaseModel): code_solution: str = Field(default="", description="Your complete code solution for this problem") + class ReviewOp(BaseModel): - review_result: bool = Field(default=False, description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'") - feedback: str = Field(default="", description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.") + review_result: bool = Field( + default=False, + description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'", + ) + feedback: str = Field( + default="", + description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.", + ) + class ReviseOp(BaseModel): revised_solution: str = Field(default="", description="Based on the feedback, revised solution for this problem") + class FuEnsembleOp(BaseModel): - thought: str = Field(default="", description="Analyze the solutions and think how to combine the advantages of various solutions to form the best possible solution.") + thought: str = Field( + default="", + description="Analyze the solutions and think how to combine the advantages of various solutions to form the best possible solution.", + ) final_solution: str = Field(default="", description="Output the final solution after analysis and integration") + class MdEnsembleOp(BaseModel): thought: str = Field( default="""Example thought process: @@ -35,22 +51,30 @@ class MdEnsembleOp(BaseModel): 5. The use of 'isinstance' for type checking is a good practice. 6. The function handles decimal separators well by replacing ',' with '.'. Overall, this solution effectively solves the problem of comparing two values, with good error handling and flexibility. It could be improved by specifying behavior for equal values, but it's a strong solution as is.""", - description="Step-by-step analysis of the solutions to determine the best one." - ) - solution_letter: str = Field( - default="", - description="The letter of the chosen best solution (only one letter)." + description="Step-by-step analysis of the solutions to determine the best one.", ) + solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).") + class TestCaseExtractOp(BaseModel): - test_cases: list = Field(default=[('', [5, 8, 7, 1], 12), ('', [3, 3, 3, 3, 3], 9)], - description="Extracted test cases from the problem description") - + test_cases: list = Field( + default=[ + "assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True", + "assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False", + "", + ], + description="Extracted test cases from the problem description", + ) + + class RephraseOp(BaseModel): rephrased_problem: str = Field(default="", description="Rephrased problem description for this problem") + class ReflectionTestOp(BaseModel): - reflection: str = Field(default="", description="对关于代码执行错误或者测试用例失败step by step的思考") - refined_solution: str = Field(default="", description="对于代码执行错误或者测试用例失败的修正方案") - - \ No newline at end of file + reflection: str = Field( + default="", description="Step-by-step reflection on code execution errors or test case failures" + ) + refined_solution: str = Field( + default="", description="Corrective solution for code execution errors or test case failures" + ) diff --git a/examples/ags/w_action_node/prompt.py b/examples/ags/w_action_node/prompt.py index 5d82d1a3c..f7c68e3ed 100644 --- a/examples/ags/w_action_node/prompt.py +++ b/examples/ags/w_action_node/prompt.py @@ -7,45 +7,22 @@ GENERATE_PROMPT = """ Generate Solution for the following problem: {problem_description} """ -# GENERATE_CODE_PROMPT = """ -# Below is an instruction that describes a task, paired with an input that provides further context. -# Write a response that appropriately completes the request. - -# ### Instruction: -# Write a program to perform the given task. - -# Input: -# {problem_description} - -# ### Response: -# """ - GENERATE_CODE_PROMPT = """ You are an expert programmer tasked with solving a coding problem. -### Problem Description: +### Problem Description {problem_description} -### Instructions: +### Instructions The above is an incomplete Python code fragment. Return the complete and correct code with no additional text. Please maintain the JSON format in your response. -### Your Response: +### Your Response """ -# GENERATE_CODEBLOCK_PROMPT = """ -# You are an expert programmer tasked with solving a coding problem. - -# ### Problem Description: -# {problem_description} - -# ### Instructions: -# The above is an incomplete Python code fragment. Return the complete and correct code with no additional text. -# """ - GENERATE_CODEBLOCK_REPHRASE_PROMPT = """ Please provide a self-contained Python script that solves the following problem in a markdown code block: -### Problem Description: +### Problem Description {problem_description} ### self reflection on the problem @@ -58,12 +35,7 @@ When creating your solution: 4. Avoid adding additional test cases beyond those provided in the problem description. """ -# GENERATE_CODEBLOCK_PROMPT = """ -# Please provide a self-contained Python script that solves the following problem in a markdown code block: -# {problem_description} -# """ - -GENERATE_CODEBLOCK_PROMPT =""" +GENERATE_CODEBLOCK_PROMPT = """ Please provide a self-contained Python script that solves the following problem in a markdown code block: {problem_description} @@ -127,10 +99,10 @@ Please strictly output in JSON format, do not output irrelevant content. """ DE_ENSEMBLE_CODE_FORMAT_PROMPT = """ Now please output your answer in json format, with the format as follows: -{{ - "reason":"<为什么要这样做>", - "code_solution":"<你觉得合适的solution,用代码表示出来>" -}} +{ + "reason":"", + "code_solution":"" +} Please strictly output in JSON format, do not output irrelevant content. """ DE_ENSEMBLE_ANGEL_PROMPT = """ @@ -159,18 +131,6 @@ You, as the moderator, will evaluate both sides' answers and determine if there Please strictly output in JSON format, do not output irrelevant content """ -EXTRACT_CASE_PROMPT = """ -You are given a coding problem, and you need to extract the test cases from the problem description. -{problem_description} - -一个problem中会有多个测试用例,每个测试用例包含三个部分: -1. 函数名 -2. 输入 -3. 期望输出 -每个测试用例包裹在一个三元组之中,三元组之间用逗号分隔,整体用列表包裹。 -由于结果需要被解析到JSON中,True与False请表示为true, false; -""" - REPHRASE_ON_PROBLEM_PROMPT = """ You are given a code contest problem: @@ -183,26 +143,26 @@ Reflect on the problem, and describe it in your own words, in bullet points. Pay """ -REFLECTION_ON_PUBILIC_TEST_PROMPT = """ - +REFLECTION_ON_PUBLIC_TEST_PROMPT = """ You are given a code contest problem, and a self-reflection on the problem: ### problem {problem_description} + ### self reflection on the problem {rephrase_problem} -======================= + A Python code solution was generated for the problem: ### Code Solution {code_solution} -======================= + This section of the code execution result is ### Execution Result {exec_pass} -======================= + However, when running the following input example, the code solution above failed to produce the expected output: #### Failed Test Case {test_fail} @@ -210,4 +170,31 @@ However, when running the following input example, the code solution above faile Your goal is to analyze the code solution and the error, and propose a fixed code which will produce the expected output for the provided test input. The fixed code should keep the solution robust, and work for all other input examples as well. Make sure the fixed code has a reasonable runtime - less than three seconds on a modern computer, given the problem constraints for large input. -""" \ No newline at end of file +""" + +EXTRACT_CASE_PROMPT = """ +You are given a coding problem, and you need to extract the test cases from the problem description. + +## Problem Description +{problem_description} + +Your task is to extract test cases from the above description and convert them into Python assert statements (as strings). These statements should be returned in a list for testing purposes. + +Example: +Input: +>>> has_close_elements([1.0, 2.0, 3.0], 0.5) +False +>>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3) +True + +Output: +[ + "assert candidate([1.0, 2.0, 3.0], 0.5) == False", + "assert candidate([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3) == True" +] + +Please ensure that: +1. Each test case is converted to a separate assert statement. +2. The function name in the original example (e.g., 'has_close_elements') is replaced with 'candidate'. +3. The assert statements are returned as strings in a list. +""" diff --git a/examples/ags/w_action_node/utils.py b/examples/ags/w_action_node/utils.py index 366cbb13e..fd3341cca 100644 --- a/examples/ags/w_action_node/utils.py +++ b/examples/ags/w_action_node/utils.py @@ -3,67 +3,42 @@ # @Author : didi # @Desc : utils for experiment +import ast import json import re -from typing import List, Dict, Any, Tuple -from metagpt.llm import LLM -from metagpt.actions.action_node import ActionNode +from typing import Any, List, Tuple + from examples.ags.w_action_node.operator_an import TestCaseExtractOp from examples.ags.w_action_node.prompt import EXTRACT_CASE_PROMPT +from metagpt.actions.action_node import ActionNode +from metagpt.llm import LLM + def extract_task_id(task_id: str) -> int: """Extract the numeric part of the task_id.""" - match = re.search(r'/(\d+)', task_id) + match = re.search(r"/(\d+)", task_id) return int(match.group(1)) if match else 0 -def jsonl_ranker(input_file: str, output_file: str): + +def sort_json_by_key(input_file: str, output_file: str, key: str = "task_id"): """ Read a JSONL file, sort the entries based on task_id, and write to a new JSONL file. - + :param input_file: Path to the input JSONL file :param output_file: Path to the output JSONL file """ # Read and parse the JSONL file - with open(input_file, 'r') as f: + with open(input_file, "r") as f: data = [json.loads(line) for line in f] - + # Sort the data based on the numeric part of task_id - sorted_data = sorted(data, key=lambda x: extract_task_id(x['task_id'])) - + sorted_data = sorted(data, key=lambda x: extract_task_id(x[key])) + # Write the sorted data to a new JSONL file - with open(output_file, 'w') as f: + with open(output_file, "w") as f: for item in sorted_data: - f.write(json.dumps(item) + '\n') + f.write(json.dumps(item) + "\n") -# def extract_test_cases_from_jsonl(problem_id:str, file_path:str="public_test.jsonl"): -# # TODO 这个JSONL效率有点神经病 -# if problem_id == "Humaneval/87": -# return [ ["get_row", [[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 1, 6], [1, 2, 3, 4, 5, 1]], 1], [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]], ["get_row", [[], 1], []], ["get_row", [[[], [1], [1, 2, 3]], 3], [(2, 2)]] ] -# elif problem_id == "Humaneval/95": -# return [ ["check_dict_case", [{"a": "apple", "b": "banana"}], True], ["check_dict_case", [{"a": "apple", "A": "banana", "B": "banana"}], False], ["check_dict_case", [{"a": "apple", "8": "banana", "a": "apple"}], False], ["check_dict_case", [{"Name": "John", "Age": "36", "City": "Houston"}], False], ["check_dict_case", [{"STATE": "NC", "ZIP": "12345"}], True] ] -# elif problem_id == "Humaneval/107": -# return [ ["even_odd_palindrome", [3], (1, 2)], ["even_odd_palindrome", [12], (4, 6)] ] -# elif problem_id == "Humaneval/112": -# return [ ["reverse_delete", ["abcde", "ae"], ("bcd", False)], ["reverse_delete", ["abcdef", "b"], ("acdef", False)], ["reverse_delete", ["abcdedcba", "ab"], ("cdedc", True)] ] -# elif problem_id == "Humaneval/127": -# return [ ["intersection", [(1, 2), (2, 3)], "NO"], ["intersection", [(-1, 1), (0, 4)], "NO"], ["intersection", [(-3, -1), (-5, 5)], "YES"] ] -# elif problem_id == "Humaneval/136": -# return [ ["largest_smallest_integers", [2, 4, 1, 3, 5, 7], (None, 1)], ["largest_smallest_integers", [], (None, None)], ["largest_smallest_integers", [0], (None, None)] ] -# elif problem_id == "Humaneval/148": -# return [ ["bf", ["Jupiter", "Neptune"], ("Saturn", "Uranus")], ["bf", ["Earth", "Mercury"], ("Venus",)], ["bf", ["Mercury", "Uranus"], ("Venus", "Earth", "Mars", "Jupiter", "Saturn")], ["bf", ["InvalidPlanet", "Neptune"], ()], ["bf", ["Jupiter", "InvalidPlanet"], ()], ["bf", ["Mercury", "Mercury"], ()] ] -# elif problem_id == "Humaneval/155": -# return [ ["even_odd_count", [-12], (1, 1)], ["even_odd_count", [123], (1, 2)] ] - -# with open(file_path, 'r') as file: -# for line in file: -# data = json.loads(line) -# if problem_id in data: -# return data[problem_id] - -# return None - -import json -import ast def parse_python_literal(s): try: @@ -71,7 +46,8 @@ def parse_python_literal(s): except (ValueError, SyntaxError): return s -def extract_test_cases_from_jsonl(problem_id:str, file_path:str="public_test_reflexion.jsonl"): + +def extract_test_cases_from_jsonl(problem_id: str, file_path: str = "public_test_reflexion.jsonl"): # 保留原有的硬编码测试用例 hardcoded_cases = { "HumanEval/32": "", @@ -84,7 +60,7 @@ def extract_test_cases_from_jsonl(problem_id:str, file_path:str="public_test_ref return hardcoded_cases[problem_id] # 如果没有硬编码的测试用例,从文件中读取 - with open(file_path, 'r') as file: + with open(file_path, "r") as file: for line in file: data = json.loads(line) if data.get("id") == problem_id: @@ -92,106 +68,63 @@ def extract_test_cases_from_jsonl(problem_id:str, file_path:str="public_test_ref return None # 如果没有找到问题,返回 None + def extract_test_cases(docstring: str) -> List[Tuple[str, List[Any], Any]]: # 使用正则表达式匹配测试用例,现在捕获函数名和任意输出 - pattern = r'>>> (\w+)\((.*?)\)\n\s*(.*?)(?=\n|$)' + pattern = r">>> (\w+)\((.*?)\)\n\s*(.*?)(?=\n|$)" matches = re.findall(pattern, docstring, re.DOTALL) - + test_cases = [] for match in matches: func_name, input_str, expected_output = match - + # 处理输入 input_list = [] - for item in input_str.split(','): + for item in input_str.split(","): item = item.strip() try: # 尝试将输入转换为数值类型 - if '.' in item: + if "." in item: input_list.append(float(item)) else: input_list.append(int(item)) except ValueError: # 如果无法转换为数值,则保留为字符串 input_list.append(item.strip("'\"")) - + # 处理输出 try: # 尝试将输出转换为数值或布尔值 - if expected_output.lower() == 'true': + if expected_output.lower() == "true": expected_output = True - elif expected_output.lower() == 'false': + elif expected_output.lower() == "false": expected_output = False - elif '.' in expected_output: + elif "." in expected_output: expected_output = float(expected_output) else: expected_output = int(expected_output) except ValueError: # 如果无法转换,则保留为字符串 expected_output = expected_output.strip("'\"") - + test_cases.append([func_name, input_list, expected_output]) - + return test_cases -async def llm_extract_test_case(id, problem_description: str, file_path:str="public_test.jsonl"): +async def llm_extract_test_case(id, problem_description: str, file_path: str = "public_test.jsonl"): prompt = EXTRACT_CASE_PROMPT.format(problem_description=problem_description) node = await ActionNode.from_pydantic(TestCaseExtractOp).fill(context=prompt, llm=LLM()) result = node.instruct_content.model_dump() - with open(file_path,"a") as f: - f.write(json.dumps({id:result["test_cases"]}) + '\n') - return {id:result["test_cases"]} + with open(file_path, "a") as f: + f.write(json.dumps({id: result["test_cases"]}) + "\n") + return {id: result["test_cases"]} -import json - -# def test_cases_2_test_functions(solution: str, test_case: List): -# print("test_case", test_case) -# function_name = test_case[0] - -# def format_param(param): -# if isinstance(param, str): -# return repr(param) -# elif isinstance(param, (int, float, bool)): -# return str(param) -# elif isinstance(param, list): -# return '[' + ', '.join(format_param(item) for item in param) + ']' -# elif isinstance(param, tuple): -# return '(' + ', '.join(format_param(item) for item in param) + ')' -# elif isinstance(param, dict): -# return '{' + ', '.join(f'{format_param(k)}: {format_param(v)}' for k, v in param.items()) + '}' -# elif isinstance(param, type(None)): -# return 'None' -# else: -# raise ValueError(f"Unsupported parameter type: {type(param)}") - -# parameters = ', '.join(format_param(item) for item in test_case[1]) -# print(test_case[1], parameters) - -# expected_output = format_param(test_case[2]) -# print(type(test_case[2]), test_case[2], expected_output) - -# tester_function = f""" -# {solution} - -# def check(candidate): -# assert candidate({parameters}) == {expected_output} - -# check({function_name}) -# """ - -# print(f""" -# Generated test function: -# {tester_function} -# """) - -# return tester_function - def test_cases_2_test_functions(solution: str, test_cases: str): tester_function = f""" {solution} {test_cases} -""" - return tester_function \ No newline at end of file +""" + return tester_function diff --git a/he_test.py b/he_test.py index a8d750d68..93fa72c66 100644 --- a/he_test.py +++ b/he_test.py @@ -1,21 +1,19 @@ import asyncio -import json -from metagpt.llm import LLM -from evalplus.data import get_human_eval_plus, write_jsonl -from examples.ags.benchmark.humaneval import sample_generate, samples_generate, extract_failure_tests, automatic_evalplus -from examples.ags.w_action_node.utils import jsonl_ranker, llm_extract_test_case -from examples.ags.w_action_node.graph import HumanEvalGraph -from examples.ags.w_action_node.utils import extract_test_cases_from_jsonl -# 132 141 136 80 73 -# asyncio.run(sample_generate('HumanEval/140',result_path="llm_based_1000.jsonl",mode="llm")) -# asyncio.run(sample_generate('HumanEval/140',result_path="llm_based_1000.jsonl",mode="llm")) -# asyncio.run(sample_generate('HumanEval/140',result_path="llm_based_1000.jsonl",mode="llm")) + +from examples.ags.benchmark.humaneval import sample_generate, samples_generate + +asyncio.run(sample_generate("HumanEval/0", result_path="llm_based_1000.jsonl", mode="llm")) +asyncio.run(samples_generate(mode="alpha_codium", result_path="alpha_based_1000.jsonl")) + +# asyncio.run(sample_generate('HumanEval/140',result_path="llm_based_1000.jsonl",mode="llm")) +# asyncio.run(sample_generate('HumanEval/140',result_path="llm_based_1000.jsonl",mode="llm")) # asyncio.run(sample_generate('HumanEval/67',result_path="llm_based_1000.jsonl",mode="llm")) # asyncio.run(sample_generate('HumanEval/108',result_path="llm_based_1000.jsonl",mode="llm")) # asyncio.run(sample_generate('HumanEval/110',result_path="llm_based_1000.jsonl",mode="llm")) -asyncio.run(samples_generate(mode='alpha',result_path="alpha_based_104.jsonl")) -# jsonl_ranker("llm_based_137.jsonl", "llm_based_137.jsonl") +# asyncio.run(samples_generate(mode='alpha',result_path="alpha_based_108.jsonl")) +# sort_json_by_key("alpha_based_108.jsonl", "alpha_based_108.jsonl") +# 64 84 160 148 109 # result_path = "ags_based_6.jsonl" # if automatic_evalplus(result_path): # unpassed_exapmle = extract_failure_tests(result_path[:-6]+"_eval_results.json") @@ -27,9 +25,6 @@ asyncio.run(samples_generate(mode='alpha',result_path="alpha_based_104.jsonl")) # for example in failure_list: # asyncio.run(sample_generate(example)) -# TODO 抽取Public Test没搞完,先用几个测试跑一下流程 -# from evalplus.data import get_human_eval_plus - # id_list = [87, 95, 107, 112, 127, 136, 148, 155] # id_list = [155] # cases_id = [f"HumanEval/{case_id}" for case_id in id_list] @@ -52,6 +47,6 @@ asyncio.run(samples_generate(mode='alpha',result_path="alpha_based_104.jsonl")) # solver = HumanEvalGraph(name="solver", llm=LLM(), criteria='correctness, efficiency, readability', vote_count=1) # result = asyncio.run(solver.alpha_codium(problem_id="HumanEval/140", problem=case_prompt, ensemble_count=1)) -# 1. Public Test 数据集不对 +# 1. Public Test 数据集不对 # 2. 修改两个Prompt的具体内容 -# 3. 尝试增加Test错误之后的修改能力 \ No newline at end of file +# 3. 尝试增加Test错误之后的修改能力 diff --git a/humaneval_analysis.ipynb b/humaneval_analysis.ipynb index fcbde3a9f..d2b1eec83 100644 --- a/humaneval_analysis.ipynb +++ b/humaneval_analysis.ipynb @@ -1,5 +1,2584 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "def check(candidate):\n", + " assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n", + " assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n", + " assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n", + " assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n", + " assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n", + " assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n", + " assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n", + "\n", + "def test_check():\n", + " check(has_close_elements)\n", + "\n", + "test_check()\n", + "\n" + ] + } + ], + "source": [ + "file_path = \"public_test_reflexion.jsonl\"\n", + "\n", + "import json\n", + "\n", + "with open(file_path, 'r') as file:\n", + " for line in file:\n", + " data = json.loads(line)\n", + " break\n", + "\n", + "print(data['test'])\n", + "\n", + "# assert_code = tests_case[0]\n", + "# f\"\"\"\n", + "# def check(candidate):\n", + "# {assert_code}\n", + "# \"\"\"\n", + "\n", + "\n", + "# f\"\"\"\n", + "# def test_check():\n", + "# check({entry_point})\n", + "# test_check()\n", + "# \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "find_zero\n", + "from typing import List\n", + "\n", + "\n", + "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n", + " \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n", + " given threshold.\n", + " >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n", + " False\n", + " >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n", + " True\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n", + " \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n", + " given threshold.\n", + " >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n", + " False\n", + " >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n", + " True\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def separate_paren_groups(paren_string: str) -> List[str]:\n", + " \"\"\" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n", + " separate those group into separate strings and return the list of those.\n", + " Separate groups are balanced (each open brace is properly closed) and not nested within each other\n", + " Ignore any spaces in the input string.\n", + " >>> separate_paren_groups('( ) (( )) (( )( ))')\n", + " ['()', '(())', '(()())']\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def truncate_number(number: float) -> float:\n", + " \"\"\" Given a positive floating point number, it can be decomposed into\n", + " and integer part (largest integer smaller than given number) and decimals\n", + " (leftover part always smaller than 1).\n", + "\n", + " Return the decimal part of the number.\n", + " >>> truncate_number(3.5)\n", + " 0.5\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def below_zero(operations: List[int]) -> bool:\n", + " \"\"\" You're given a list of deposit and withdrawal operations on a bank account that starts with\n", + " zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n", + " at that point function should return True. Otherwise it should return False.\n", + " >>> below_zero([1, 2, 3])\n", + " False\n", + " >>> below_zero([1, 2, -4, 5])\n", + " True\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def mean_absolute_deviation(numbers: List[float]) -> float:\n", + " \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n", + " around the mean of this dataset.\n", + " Mean Absolute Deviation is the average absolute difference between each\n", + " element and a centerpoint (mean in this case):\n", + " MAD = average | x - x_mean |\n", + " >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n", + " 1.0\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n", + " \"\"\" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n", + " >>> intersperse([], 4)\n", + " []\n", + " >>> intersperse([1, 2, 3], 4)\n", + " [1, 4, 2, 4, 3]\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def parse_nested_parens(paren_string: str) -> List[int]:\n", + " \"\"\" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n", + " For each of the group, output the deepest level of nesting of parentheses.\n", + " E.g. (()()) has maximum two levels of nesting while ((())) has three.\n", + "\n", + " >>> parse_nested_parens('(()()) ((())) () ((())()())')\n", + " [2, 3, 1, 3]\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def filter_by_substring(strings: List[str], substring: str) -> List[str]:\n", + " \"\"\" Filter an input list of strings only for ones that contain given substring\n", + " >>> filter_by_substring([], 'a')\n", + " []\n", + " >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\n", + " ['abc', 'bacd', 'array']\n", + " \"\"\"\n", + "\n", + "from typing import List, Tuple\n", + "\n", + "\n", + "def sum_product(numbers: List[int]) -> Tuple[int, int]:\n", + " \"\"\" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n", + " Empty sum should be equal to 0 and empty product should be equal to 1.\n", + " >>> sum_product([])\n", + " (0, 1)\n", + " >>> sum_product([1, 2, 3, 4])\n", + " (10, 24)\n", + " \"\"\"\n", + "\n", + "from typing import List, Tuple\n", + "\n", + "\n", + "def rolling_max(numbers: List[int]) -> List[int]:\n", + " \"\"\" From a given list of integers, generate a list of rolling maximum element found until given moment\n", + " in the sequence.\n", + " >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n", + " [1, 2, 3, 3, 3, 4, 4]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def is_palindrome(string: str) -> bool:\n", + " \"\"\" Test if given string is a palindrome \"\"\"\n", + " return string == string[::-1]\n", + "\n", + "\n", + "def make_palindrome(string: str) -> str:\n", + " \"\"\" Find the shortest palindrome that begins with a supplied string.\n", + " Algorithm idea is simple:\n", + " - Find the longest postfix of supplied string that is a palindrome.\n", + " - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n", + " >>> make_palindrome('')\n", + " ''\n", + " >>> make_palindrome('cat')\n", + " 'catac'\n", + " >>> make_palindrome('cata')\n", + " 'catac'\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def string_xor(a: str, b: str) -> str:\n", + " \"\"\" Input are two strings a and b consisting only of 1s and 0s.\n", + " Perform binary XOR on these inputs and return result also as a string.\n", + " >>> string_xor('010', '110')\n", + " '100'\n", + " \"\"\"\n", + "\n", + "from typing import List, Optional\n", + "\n", + "\n", + "def longest(strings: List[str]) -> Optional[str]:\n", + " \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n", + " strings of the same length. Return None in case the input list is empty.\n", + " >>> longest([])\n", + "\n", + " >>> longest(['a', 'b', 'c'])\n", + " 'a'\n", + " >>> longest(['a', 'bb', 'ccc'])\n", + " 'ccc'\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def greatest_common_divisor(a: int, b: int) -> int:\n", + " \"\"\" Return a greatest common divisor of two integers a and b\n", + " >>> greatest_common_divisor(3, 5)\n", + " 1\n", + " >>> greatest_common_divisor(25, 15)\n", + " 5\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def all_prefixes(string: str) -> List[str]:\n", + " \"\"\" Return list of all prefixes from shortest to longest of the input string\n", + " >>> all_prefixes('abc')\n", + " ['a', 'ab', 'abc']\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def string_sequence(n: int) -> str:\n", + " \"\"\" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n", + " >>> string_sequence(0)\n", + " '0'\n", + " >>> string_sequence(5)\n", + " '0 1 2 3 4 5'\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def count_distinct_characters(string: str) -> int:\n", + " \"\"\" Given a string, find out how many distinct characters (regardless of case) does it consist of\n", + " >>> count_distinct_characters('xyzXYZ')\n", + " 3\n", + " >>> count_distinct_characters('Jerry')\n", + " 4\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def parse_music(music_string: str) -> List[int]:\n", + " \"\"\" Input to this function is a string representing musical notes in a special ASCII format.\n", + " Your task is to parse this string and return list of integers corresponding to how many beats does each\n", + " not last.\n", + "\n", + " Here is a legend:\n", + " 'o' - whole note, lasts four beats\n", + " 'o|' - half note, lasts two beats\n", + " '.|' - quater note, lasts one beat\n", + "\n", + " >>> parse_music('o o| .| o| o| .| .| .| .| o o')\n", + " [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def how_many_times(string: str, substring: str) -> int:\n", + " \"\"\" Find how many times a given substring can be found in the original string. Count overlaping cases.\n", + " >>> how_many_times('', 'a')\n", + " 0\n", + " >>> how_many_times('aaa', 'a')\n", + " 3\n", + " >>> how_many_times('aaaa', 'aa')\n", + " 3\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def sort_numbers(numbers: str) -> str:\n", + " \"\"\" Input is a space-delimited string of numberals from 'zero' to 'nine'.\n", + " Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight' and 'nine'.\n", + " Return the string with numbers sorted from smallest to largest\n", + " >>> sort_numbers('three one five')\n", + " 'one three five'\n", + " \"\"\"\n", + "\n", + "from typing import List, Tuple\n", + "\n", + "\n", + "def find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n", + " \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n", + " other and return them in order (smaller number, larger number).\n", + " >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n", + " (2.0, 2.2)\n", + " >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n", + " (2.0, 2.0)\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def rescale_to_unit(numbers: List[float]) -> List[float]:\n", + " \"\"\" Given list of numbers (of at least two elements), apply a linear transform to that list,\n", + " such that the smallest number will become 0 and the largest will become 1\n", + " >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])\n", + " [0.0, 0.25, 0.5, 0.75, 1.0]\n", + " \"\"\"\n", + "\n", + "from typing import List, Any\n", + "\n", + "\n", + "def filter_integers(values: List[Any]) -> List[int]:\n", + " \"\"\" Filter given list of any python values only for integers\n", + " >>> filter_integers(['a', 3.14, 5])\n", + " [5]\n", + " >>> filter_integers([1, 2, 3, 'abc', {}, []])\n", + " [1, 2, 3]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def strlen(string: str) -> int:\n", + " \"\"\" Return length of given string\n", + " >>> strlen('')\n", + " 0\n", + " >>> strlen('abc')\n", + " 3\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def largest_divisor(n: int) -> int:\n", + " \"\"\" For a given number n, find the largest number that divides n evenly, smaller than n\n", + " >>> largest_divisor(15)\n", + " 5\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def factorize(n: int) -> List[int]:\n", + " \"\"\" Return list of prime factors of given integer in the order from smallest to largest.\n", + " Each of the factors should be listed number of times corresponding to how many times it appeares in factorization.\n", + " Input number should be equal to the product of all factors\n", + " >>> factorize(8)\n", + " [2, 2, 2]\n", + " >>> factorize(25)\n", + " [5, 5]\n", + " >>> factorize(70)\n", + " [2, 5, 7]\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def remove_duplicates(numbers: List[int]) -> List[int]:\n", + " \"\"\" From a list of integers, remove all elements that occur more than once.\n", + " Keep order of elements left the same as in the input.\n", + " >>> remove_duplicates([1, 2, 3, 2, 4])\n", + " [1, 3, 4]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def flip_case(string: str) -> str:\n", + " \"\"\" For a given string, flip lowercase characters to uppercase and uppercase to lowercase.\n", + " >>> flip_case('Hello')\n", + " 'hELLO'\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def concatenate(strings: List[str]) -> str:\n", + " \"\"\" Concatenate list of strings into a single string\n", + " >>> concatenate([])\n", + " ''\n", + " >>> concatenate(['a', 'b', 'c'])\n", + " 'abc'\n", + " \"\"\"\n", + "\n", + "from typing import List\n", + "\n", + "\n", + "def filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n", + " \"\"\" Filter an input list of strings only for ones that start with a given prefix.\n", + " >>> filter_by_prefix([], 'a')\n", + " []\n", + " >>> filter_by_prefix(['abc', 'bcd', 'cde', 'array'], 'a')\n", + " ['abc', 'array']\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def get_positive(l: list):\n", + " \"\"\"Return only positive numbers in the list.\n", + " >>> get_positive([-1, 2, -4, 5, 6])\n", + " [2, 5, 6]\n", + " >>> get_positive([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n", + " [5, 3, 2, 3, 9, 123, 1]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def is_prime(n):\n", + " \"\"\"Return true if a given number is prime, and false otherwise.\n", + " >>> is_prime(6)\n", + " False\n", + " >>> is_prime(101)\n", + " True\n", + " >>> is_prime(11)\n", + " True\n", + " >>> is_prime(13441)\n", + " True\n", + " >>> is_prime(61)\n", + " True\n", + " >>> is_prime(4)\n", + " False\n", + " >>> is_prime(1)\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def sort_third(l: list):\n", + " \"\"\"This function takes a list l and returns a list l' such that\n", + " l' is identical to l in the indicies that are not divisible by three, while its values at the indicies that are divisible by three are equal\n", + " to the values of the corresponding indicies of l, but sorted.\n", + " >>> sort_third([1, 2, 3])\n", + " [1, 2, 3]\n", + " >>> sort_third([5, 6, 3, 4, 8, 9, 2])\n", + " [2, 6, 3, 4, 8, 9, 5]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def unique(l: list):\n", + " \"\"\"Return sorted unique elements in a list\n", + " >>> unique([5, 3, 5, 2, 3, 3, 9, 0, 123])\n", + " [0, 2, 3, 5, 9, 123]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def max_element(l: list):\n", + " \"\"\"Return maximum element in the list.\n", + " >>> max_element([1, 2, 3])\n", + " 3\n", + " >>> max_element([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])\n", + " 123\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def fizz_buzz(n: int):\n", + " \"\"\"Return the number of times the digit 7 appears in integers less than n which are divisible by 11 or 13.\n", + " >>> fizz_buzz(50)\n", + " 0\n", + " >>> fizz_buzz(78)\n", + " 2\n", + " >>> fizz_buzz(79)\n", + " 3\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def sort_even(l: list):\n", + " \"\"\"This function takes a list l and returns a list l' such that\n", + " l' is identical to l in the odd indicies, while its values at the even indicies are equal\n", + " to the values of the even indicies of l, but sorted.\n", + " >>> sort_even([1, 2, 3])\n", + " [1, 2, 3]\n", + " >>> sort_even([5, 6, 3, 4])\n", + " [3, 6, 5, 4]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def prime_fib(n: int):\n", + " \"\"\"\n", + " prime_fib returns n-th number that is a Fibonacci number and it's also prime.\n", + " >>> prime_fib(1)\n", + " 2\n", + " >>> prime_fib(2)\n", + " 3\n", + " >>> prime_fib(3)\n", + " 5\n", + " >>> prime_fib(4)\n", + " 13\n", + " >>> prime_fib(5)\n", + " 89\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def triples_sum_to_zero(l: list):\n", + " \"\"\"\n", + " triples_sum_to_zero takes a list of integers as an input.\n", + " it returns True if there are three distinct elements in the list that\n", + " sum to zero, and False otherwise.\n", + "\n", + " >>> triples_sum_to_zero([1, 3, 5, 0])\n", + " False\n", + " >>> triples_sum_to_zero([1, 3, -2, 1])\n", + " True\n", + " >>> triples_sum_to_zero([1, 2, 3, 7])\n", + " False\n", + " >>> triples_sum_to_zero([2, 4, -5, 3, 9, 7])\n", + " True\n", + " >>> triples_sum_to_zero([1])\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def car_race_collision(n: int):\n", + " \"\"\"\n", + " Imagine a road that's a perfectly straight infinitely long line.\n", + " n cars are driving left to right; simultaneously, a different set of n cars\n", + " are driving right to left. The two sets of cars start out being very far from\n", + " each other. All cars move in the same speed. Two cars are said to collide\n", + " when a car that's moving left to right hits a car that's moving right to left.\n", + " However, the cars are infinitely sturdy and strong; as a result, they continue moving\n", + " in their trajectory as if they did not collide.\n", + "\n", + " This function outputs the number of such collisions.\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def incr_list(l: list):\n", + " \"\"\"Return list with elements incremented by 1.\n", + " >>> incr_list([1, 2, 3])\n", + " [2, 3, 4]\n", + " >>> incr_list([5, 3, 5, 2, 3, 3, 9, 0, 123])\n", + " [6, 4, 6, 3, 4, 4, 10, 1, 124]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def pairs_sum_to_zero(l):\n", + " \"\"\"\n", + " pairs_sum_to_zero takes a list of integers as an input.\n", + " it returns True if there are two distinct elements in the list that\n", + " sum to zero, and False otherwise.\n", + " >>> pairs_sum_to_zero([1, 3, 5, 0])\n", + " False\n", + " >>> pairs_sum_to_zero([1, 3, -2, 1])\n", + " False\n", + " >>> pairs_sum_to_zero([1, 2, 3, 7])\n", + " False\n", + " >>> pairs_sum_to_zero([2, 4, -5, 3, 5, 7])\n", + " True\n", + " >>> pairs_sum_to_zero([1])\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def change_base(x: int, base: int):\n", + " \"\"\"Change numerical base of input number x to base.\n", + " return string representation after the conversion.\n", + " base numbers are less than 10.\n", + " >>> change_base(8, 3)\n", + " '22'\n", + " >>> change_base(8, 2)\n", + " '1000'\n", + " >>> change_base(7, 2)\n", + " '111'\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def triangle_area(a, h):\n", + " \"\"\"Given length of a side and high return area for a triangle.\n", + " >>> triangle_area(5, 3)\n", + " 7.5\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def fib4(n: int):\n", + " \"\"\"The Fib4 number sequence is a sequence similar to the Fibbonacci sequnece that's defined as follows:\n", + " fib4(0) -> 0\n", + " fib4(1) -> 0\n", + " fib4(2) -> 2\n", + " fib4(3) -> 0\n", + " fib4(n) -> fib4(n-1) + fib4(n-2) + fib4(n-3) + fib4(n-4).\n", + " Please write a function to efficiently compute the n-th element of the fib4 number sequence. Do not use recursion.\n", + " >>> fib4(5)\n", + " 4\n", + " >>> fib4(6)\n", + " 8\n", + " >>> fib4(7)\n", + " 14\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def median(l: list):\n", + " \"\"\"Return median of elements in the list l.\n", + " >>> median([3, 1, 2, 4, 5])\n", + " 3\n", + " >>> median([-10, 4, 6, 1000, 10, 20])\n", + " 15.0\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def is_palindrome(text: str):\n", + " \"\"\"\n", + " Checks if given string is a palindrome\n", + " >>> is_palindrome('')\n", + " True\n", + " >>> is_palindrome('aba')\n", + " True\n", + " >>> is_palindrome('aaaaa')\n", + " True\n", + " >>> is_palindrome('zbcd')\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def modp(n: int, p: int):\n", + " \"\"\"Return 2^n modulo p (be aware of numerics).\n", + " >>> modp(3, 5)\n", + " 3\n", + " >>> modp(1101, 101)\n", + " 2\n", + " >>> modp(0, 101)\n", + " 1\n", + " >>> modp(3, 11)\n", + " 8\n", + " >>> modp(100, 101)\n", + " 1\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def remove_vowels(text):\n", + " \"\"\"\n", + " remove_vowels is a function that takes string and returns string without vowels.\n", + " >>> remove_vowels('')\n", + " ''\n", + " >>> remove_vowels(\"abcdef\\nghijklm\")\n", + " 'bcdf\\nghjklm'\n", + " >>> remove_vowels('abcdef')\n", + " 'bcdf'\n", + " >>> remove_vowels('aaaaa')\n", + " ''\n", + " >>> remove_vowels('aaBAA')\n", + " 'B'\n", + " >>> remove_vowels('zbcd')\n", + " 'zbcd'\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def below_threshold(l: list, t: int):\n", + " \"\"\"Return True if all numbers in the list l are below threshold t.\n", + " >>> below_threshold([1, 2, 4, 10], 100)\n", + " True\n", + " >>> below_threshold([1, 20, 4, 10], 5)\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def add(x: int, y: int):\n", + " \"\"\"Add two numbers x and y\n", + " >>> add(2, 3)\n", + " 5\n", + " >>> add(5, 7)\n", + " 12\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def same_chars(s0: str, s1: str):\n", + " \"\"\"\n", + " Check if two words have the same characters.\n", + " >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')\n", + " True\n", + " >>> same_chars('abcd', 'dddddddabc')\n", + " True\n", + " >>> same_chars('dddddddabc', 'abcd')\n", + " True\n", + " >>> same_chars('eabcd', 'dddddddabc')\n", + " False\n", + " >>> same_chars('abcd', 'dddddddabce')\n", + " False\n", + " >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def fib(n: int):\n", + " \"\"\"Return n-th Fibonacci number.\n", + " >>> fib(10)\n", + " 55\n", + " >>> fib(1)\n", + " 1\n", + " >>> fib(8)\n", + " 21\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def correct_bracketing(brackets: str):\n", + " \"\"\" brackets is a string of \"<\" and \">\".\n", + " return True if every opening bracket has a corresponding closing bracket.\n", + "\n", + " >>> correct_bracketing(\"<\")\n", + " False\n", + " >>> correct_bracketing(\"<>\")\n", + " True\n", + " >>> correct_bracketing(\"<<><>>\")\n", + " True\n", + " >>> correct_bracketing(\"><<>\")\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def monotonic(l: list):\n", + " \"\"\"Return True is list elements are monotonically increasing or decreasing.\n", + " >>> monotonic([1, 2, 4, 20])\n", + " True\n", + " >>> monotonic([1, 20, 4, 10])\n", + " False\n", + " >>> monotonic([4, 1, 0, -10])\n", + " True\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def common(l1: list, l2: list):\n", + " \"\"\"Return sorted unique common elements for two lists.\n", + " >>> common([1, 4, 3, 34, 653, 2, 5], [5, 7, 1, 5, 9, 653, 121])\n", + " [1, 5, 653]\n", + " >>> common([5, 3, 2, 8], [3, 2])\n", + " [2, 3]\n", + "\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def largest_prime_factor(n: int):\n", + " \"\"\"Return the largest prime factor of n. Assume n > 1 and is not a prime.\n", + " >>> largest_prime_factor(13195)\n", + " 29\n", + " >>> largest_prime_factor(2048)\n", + " 2\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def sum_to_n(n: int):\n", + " \"\"\"sum_to_n is a function that sums numbers from 1 to n.\n", + " >>> sum_to_n(30)\n", + " 465\n", + " >>> sum_to_n(100)\n", + " 5050\n", + " >>> sum_to_n(5)\n", + " 15\n", + " >>> sum_to_n(10)\n", + " 55\n", + " >>> sum_to_n(1)\n", + " 1\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def correct_bracketing(brackets: str):\n", + " \"\"\" brackets is a string of \"(\" and \")\".\n", + " return True if every opening bracket has a corresponding closing bracket.\n", + "\n", + " >>> correct_bracketing(\"(\")\n", + " False\n", + " >>> correct_bracketing(\"()\")\n", + " True\n", + " >>> correct_bracketing(\"(()())\")\n", + " True\n", + " >>> correct_bracketing(\")(()\")\n", + " False\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def derivative(xs: list):\n", + " \"\"\" xs represent coefficients of a polynomial.\n", + " xs[0] + xs[1] * x + xs[2] * x^2 + ....\n", + " Return derivative of this polynomial in the same form.\n", + " >>> derivative([3, 1, 2, 4, 5])\n", + " [1, 4, 12, 20]\n", + " >>> derivative([1, 2, 3])\n", + " [2, 6]\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def fibfib(n: int):\n", + " \"\"\"The FibFib number sequence is a sequence similar to the Fibbonacci sequnece that's defined as follows:\n", + " fibfib(0) == 0\n", + " fibfib(1) == 0\n", + " fibfib(2) == 1\n", + " fibfib(n) == fibfib(n-1) + fibfib(n-2) + fibfib(n-3).\n", + " Please write a function to efficiently compute the n-th element of the fibfib number sequence.\n", + " >>> fibfib(1)\n", + " 0\n", + " >>> fibfib(5)\n", + " 4\n", + " >>> fibfib(8)\n", + " 24\n", + " \"\"\"\n", + "\n", + "\n", + "FIX = \"\"\"\n", + "Add more test cases.\n", + "\"\"\"\n", + "\n", + "def vowels_count(s):\n", + " \"\"\"Write a function vowels_count which takes a string representing\n", + " a word as input and returns the number of vowels in the string.\n", + " Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a\n", + " vowel, but only when it is at the end of the given word.\n", + "\n", + " Example:\n", + " >>> vowels_count(\"abcde\")\n", + " 2\n", + " >>> vowels_count(\"ACEDY\")\n", + " 3\n", + " \"\"\"\n", + "\n", + "\n", + "def circular_shift(x, shift):\n", + " \"\"\"Circular shift the digits of the integer x, shift the digits right by shift\n", + " and return the result as a string.\n", + " If shift > number of digits, return digits reversed.\n", + " >>> circular_shift(12, 1)\n", + " \"21\"\n", + " >>> circular_shift(12, 2)\n", + " \"12\"\n", + " \"\"\"\n", + "\n", + "\n", + "def digitSum(s):\n", + " \"\"\"Task\n", + " Write a function that takes a string as input and returns the sum of the upper characters only'\n", + " ASCII codes.\n", + "\n", + " Examples:\n", + " digitSum(\"\") => 0\n", + " digitSum(\"abAB\") => 131\n", + " digitSum(\"abcCd\") => 67\n", + " digitSum(\"helloE\") => 69\n", + " digitSum(\"woArBld\") => 131\n", + " digitSum(\"aAaaaXa\") => 153\n", + " \"\"\"\n", + "\n", + "\n", + "def fruit_distribution(s,n):\n", + " \"\"\"\n", + " In this task, you will be given a string that represents a number of apples and oranges \n", + " that are distributed in a basket of fruit this basket contains \n", + " apples, oranges, and mango fruits. Given the string that represents the total number of \n", + " the oranges and apples and an integer that represent the total number of the fruits \n", + " in the basket return the number of the mango fruits in the basket.\n", + " for examble:\n", + " fruit_distribution(\"5 apples and 6 oranges\", 19) ->19 - 5 - 6 = 8\n", + " fruit_distribution(\"0 apples and 1 oranges\",3) -> 3 - 0 - 1 = 2\n", + " fruit_distribution(\"2 apples and 3 oranges\", 100) -> 100 - 2 - 3 = 95\n", + " fruit_distribution(\"100 apples and 1 oranges\",120) -> 120 - 100 - 1 = 19\n", + " \"\"\"\n", + "\n", + "\n", + "def pluck(arr):\n", + " \"\"\"\n", + " \"Given an array representing a branch of a tree that has non-negative integer nodes\n", + " your task is to pluck one of the nodes and return it.\n", + " The plucked node should be the node with the smallest even value.\n", + " If multiple nodes with the same smallest even value are found return the node that has smallest index.\n", + "\n", + " The plucked node should be returned in a list, [ smalest_value, its index ],\n", + " If there are no even values or the given array is empty, return [].\n", + "\n", + " Example 1:\n", + " Input: [4,2,3]\n", + " Output: [2, 1]\n", + " Explanation: 2 has the smallest even value, and 2 has the smallest index.\n", + "\n", + " Example 2:\n", + " Input: [1,2,3]\n", + " Output: [2, 1]\n", + " Explanation: 2 has the smallest even value, and 2 has the smallest index. \n", + "\n", + " Example 3:\n", + " Input: []\n", + " Output: []\n", + " \n", + " Example 4:\n", + " Input: [5, 0, 3, 0, 4, 2]\n", + " Output: [0, 1]\n", + " Explanation: 0 is the smallest value, but there are two zeros,\n", + " so we will choose the first zero, which has the smallest index.\n", + "\n", + " Constraints:\n", + " * 1 <= nodes.length <= 10000\n", + " * 0 <= node.value\n", + " \"\"\"\n", + "\n", + "\n", + "def search(lst):\n", + " '''\n", + " You are given a non-empty list of positive integers. Return the greatest integer that is greater than \n", + " zero, and has a frequency greater than or equal to the value of the integer itself. \n", + " The frequency of an integer is the number of times it appears in the list.\n", + " If no such a value exist, return -1.\n", + " Examples:\n", + " search([4, 1, 2, 2, 3, 1]) == 2\n", + " search([1, 2, 2, 3, 3, 3, 4, 4, 4]) == 3\n", + " search([5, 5, 4, 4, 4]) == -1\n", + " '''\n", + "\n", + "\n", + "def strange_sort_list(lst):\n", + " '''\n", + " Given list of integers, return list in strange order.\n", + " Strange sorting, is when you start with the minimum value,\n", + " then maximum of the remaining integers, then minimum and so on.\n", + "\n", + " Examples:\n", + " strange_sort_list([1, 2, 3, 4]) == [1, 4, 2, 3]\n", + " strange_sort_list([5, 5, 5, 5]) == [5, 5, 5, 5]\n", + " strange_sort_list([]) == []\n", + " '''\n", + "\n", + "\n", + "def triangle_area(a, b, c):\n", + " '''\n", + " Given the lengths of the three sides of a triangle. Return the area of\n", + " the triangle rounded to 2 decimal points if the three sides form a valid triangle. \n", + " Otherwise return -1\n", + " Three sides make a valid triangle when the sum of any two sides is greater \n", + " than the third side.\n", + " Example:\n", + " triangle_area(3, 4, 5) == 6.00\n", + " triangle_area(1, 2, 10) == -1\n", + " '''\n", + "\n", + "\n", + "def will_it_fly(q,w):\n", + " '''\n", + " Write a function that returns True if the object q will fly, and False otherwise.\n", + " The object q will fly if it's balanced (it is a palindromic list) and the sum of its elements is less than or equal the maximum possible weight w.\n", + "\n", + " Example:\n", + " will_it_fly([1, 2], 5) ➞ False \n", + " # 1+2 is less than the maximum possible weight, but it's unbalanced.\n", + "\n", + " will_it_fly([3, 2, 3], 1) ➞ False\n", + " # it's balanced, but 3+2+3 is more than the maximum possible weight.\n", + "\n", + " will_it_fly([3, 2, 3], 9) ➞ True\n", + " # 3+2+3 is less than the maximum possible weight, and it's balanced.\n", + "\n", + " will_it_fly([3], 5) ➞ True\n", + " # 3 is less than the maximum possible weight, and it's balanced.\n", + " '''\n", + "\n", + "\n", + "def smallest_change(arr):\n", + " \"\"\"\n", + " Given an array arr of integers, find the minimum number of elements that\n", + " need to be changed to make the array palindromic. A palindromic array is an array that\n", + " is read the same backwards and forwards. In one change, you can change one element to any other element.\n", + "\n", + " For example:\n", + " smallest_change([1,2,3,5,4,7,9,6]) == 4\n", + " smallest_change([1, 2, 3, 4, 3, 2, 2]) == 1\n", + " smallest_change([1, 2, 3, 2, 1]) == 0\n", + " \"\"\"\n", + "\n", + "\n", + "def total_match(lst1, lst2):\n", + " '''\n", + " Write a function that accepts two lists of strings and returns the list that has \n", + " total number of chars in the all strings of the list less than the other list.\n", + "\n", + " if the two lists have the same number of chars, return the first list.\n", + "\n", + " Examples\n", + " total_match([], []) ➞ []\n", + " total_match(['hi', 'admin'], ['hI', 'Hi']) ➞ ['hI', 'Hi']\n", + " total_match(['hi', 'admin'], ['hi', 'hi', 'admin', 'project']) ➞ ['hi', 'admin']\n", + " total_match(['hi', 'admin'], ['hI', 'hi', 'hi']) ➞ ['hI', 'hi', 'hi']\n", + " total_match(['4'], ['1', '2', '3', '4', '5']) ➞ ['4']\n", + " '''\n", + "\n", + "\n", + "def is_multiply_prime(a):\n", + " \"\"\"Write a function that returns true if the given number is the multiplication of 3 prime numbers\n", + " and false otherwise.\n", + " Knowing that (a) is less then 100. \n", + " Example:\n", + " is_multiply_prime(30) == True\n", + " 30 = 2 * 3 * 5\n", + " \"\"\"\n", + "\n", + "\n", + "def is_simple_power(x, n):\n", + " \"\"\"Your task is to write a function that returns true if a number x is a simple\n", + " power of n and false in other cases.\n", + " x is a simple power of n if n**int=x\n", + " For example:\n", + " is_simple_power(1, 4) => true\n", + " is_simple_power(2, 2) => true\n", + " is_simple_power(8, 2) => true\n", + " is_simple_power(3, 2) => false\n", + " is_simple_power(3, 1) => false\n", + " is_simple_power(5, 3) => false\n", + " \"\"\"\n", + "\n", + "\n", + "def iscube(a):\n", + " '''\n", + " Write a function that takes an integer a and returns True \n", + " if this ingeger is a cube of some integer number.\n", + " Note: you may assume the input is always valid.\n", + " Examples:\n", + " iscube(1) ==> True\n", + " iscube(2) ==> False\n", + " iscube(-1) ==> True\n", + " iscube(64) ==> True\n", + " iscube(0) ==> True\n", + " iscube(180) ==> False\n", + " '''\n", + "\n", + "\n", + "def hex_key(num):\n", + " \"\"\"You have been tasked to write a function that receives \n", + " a hexadecimal number as a string and counts the number of hexadecimal \n", + " digits that are primes (prime number, or a prime, is a natural number \n", + " greater than 1 that is not a product of two smaller natural numbers).\n", + " Hexadecimal digits are 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F.\n", + " Prime numbers are 2, 3, 5, 7, 11, 13, 17,...\n", + " So you have to determine a number of the following digits: 2, 3, 5, 7, \n", + " B (=decimal 11), D (=decimal 13).\n", + " Note: you may assume the input is always correct or empty string, \n", + " and symbols A,B,C,D,E,F are always uppercase.\n", + " Examples:\n", + " For num = \"AB\" the output should be 1.\n", + " For num = \"1077E\" the output should be 2.\n", + " For num = \"ABED1A33\" the output should be 4.\n", + " For num = \"123456789ABCDEF0\" the output should be 6.\n", + " For num = \"2020\" the output should be 2.\n", + " \"\"\"\n", + "\n", + "\n", + "def decimal_to_binary(decimal):\n", + " \"\"\"You will be given a number in decimal form and your task is to convert it to\n", + " binary format. The function should return a string, with each character representing a binary\n", + " number. Each character in the string will be '0' or '1'.\n", + "\n", + " There will be an extra couple of characters 'db' at the beginning and at the end of the string.\n", + " The extra characters are there to help with the format.\n", + "\n", + " Examples:\n", + " decimal_to_binary(15) # returns \"db1111db\"\n", + " decimal_to_binary(32) # returns \"db100000db\"\n", + " \"\"\"\n", + "\n", + "\n", + "def is_happy(s):\n", + " \"\"\"You are given a string s.\n", + " Your task is to check if the string is happy or not.\n", + " A string is happy if its length is at least 3 and every 3 consecutive letters are distinct\n", + " For example:\n", + " is_happy(a) => False\n", + " is_happy(aa) => False\n", + " is_happy(abcd) => True\n", + " is_happy(aabb) => False\n", + " is_happy(adb) => True\n", + " is_happy(xyy) => False\n", + " \"\"\"\n", + "\n", + "\n", + "def numerical_letter_grade(grades):\n", + " \"\"\"It is the last week of the semester and the teacher has to give the grades\n", + " to students. The teacher has been making her own algorithm for grading.\n", + " The only problem is, she has lost the code she used for grading.\n", + " She has given you a list of GPAs for some students and you have to write \n", + " a function that can output a list of letter grades using the following table:\n", + " GPA | Letter grade\n", + " 4.0 A+\n", + " > 3.7 A \n", + " > 3.3 A- \n", + " > 3.0 B+\n", + " > 2.7 B \n", + " > 2.3 B-\n", + " > 2.0 C+\n", + " > 1.7 C\n", + " > 1.3 C-\n", + " > 1.0 D+ \n", + " > 0.7 D \n", + " > 0.0 D-\n", + " 0.0 E\n", + " \n", + "\n", + " Example:\n", + " grade_equation([4.0, 3, 1.7, 2, 3.5]) ==> ['A+', 'B', 'C-', 'C', 'A-']\n", + " \"\"\"\n", + "\n", + "\n", + "def prime_length(string):\n", + " \"\"\"Write a function that takes a string and returns True if the string\n", + " length is a prime number or False otherwise\n", + " Examples\n", + " prime_length('Hello') == True\n", + " prime_length('abcdcba') == True\n", + " prime_length('kittens') == True\n", + " prime_length('orange') == False\n", + " \"\"\"\n", + "\n", + "\n", + "def starts_one_ends(n):\n", + " \"\"\"\n", + " Given a positive integer n, return the count of the numbers of n-digit\n", + " positive integers that start or end with 1.\n", + " \"\"\"\n", + "\n", + "\n", + "def solve(N):\n", + " \"\"\"Given a positive integer N, return the total sum of its digits in binary.\n", + " \n", + " Example\n", + " For N = 1000, the sum of digits will be 1 the output should be \"1\".\n", + " For N = 150, the sum of digits will be 6 the output should be \"110\".\n", + " For N = 147, the sum of digits will be 12 the output should be \"1100\".\n", + " \n", + " Variables:\n", + " @N integer\n", + " Constraints: 0 ≤ N ≤ 10000.\n", + " Output:\n", + " a string of binary number\n", + " \"\"\"\n", + "\n", + "\n", + "def add(lst):\n", + " \"\"\"Given a non-empty list of integers lst. add the even elements that are at odd indices..\n", + "\n", + "\n", + " Examples:\n", + " add([4, 2, 6, 7]) ==> 2 \n", + " \"\"\"\n", + "\n", + "\n", + "def anti_shuffle(s):\n", + " \"\"\"\n", + " Write a function that takes a string and returns an ordered version of it.\n", + " Ordered version of string, is a string where all words (separated by space)\n", + " are replaced by a new word where all the characters arranged in\n", + " ascending order based on ascii value.\n", + " Note: You should keep the order of words and blank spaces in the sentence.\n", + "\n", + " For example:\n", + " anti_shuffle('Hi') returns 'Hi'\n", + " anti_shuffle('hello') returns 'ehllo'\n", + " anti_shuffle('Hello World!!!') returns 'Hello !!!Wdlor'\n", + " \"\"\"\n", + "\n", + "\n", + "def get_row(lst, x):\n", + " \"\"\"\n", + " You are given a 2 dimensional data, as a nested lists,\n", + " which is similar to matrix, however, unlike matrices,\n", + " each row may contain a different number of columns.\n", + " Given lst, and integer x, find integers x in the list,\n", + " and return list of tuples, [(x1, y1), (x2, y2) ...] such that\n", + " each tuple is a coordinate - (row, columns), starting with 0.\n", + " Sort coordinates initially by rows in ascending order.\n", + " Also, sort coordinates of the row by columns in descending order.\n", + " \n", + " Examples:\n", + " get_row([\n", + " [1,2,3,4,5,6],\n", + " [1,2,3,4,1,6],\n", + " [1,2,3,4,5,1]\n", + " ], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]\n", + " get_row([], 1) == []\n", + " get_row([[], [1], [1, 2, 3]], 3) == [(2, 2)]\n", + " \"\"\"\n", + "\n", + "\n", + "def sort_array(array):\n", + " \"\"\"\n", + " Given an array of non-negative integers, return a copy of the given array after sorting,\n", + " you will sort the given array in ascending order if the sum( first index value, last index value) is odd,\n", + " or sort it in descending order if the sum( first index value, last index value) is even.\n", + "\n", + " Note:\n", + " * don't change the given array.\n", + "\n", + " Examples:\n", + " * sort_array([]) => []\n", + " * sort_array([5]) => [5]\n", + " * sort_array([2, 4, 3, 0, 1, 5]) => [0, 1, 2, 3, 4, 5]\n", + " * sort_array([2, 4, 3, 0, 1, 5, 6]) => [6, 5, 4, 3, 2, 1, 0]\n", + " \"\"\"\n", + "\n", + "\n", + "def encrypt(s):\n", + " \"\"\"Create a function encrypt that takes a string as an argument and\n", + " returns a string encrypted with the alphabet being rotated. \n", + " The alphabet should be rotated in a manner such that the letters \n", + " shift down by two multiplied to two places.\n", + " For example:\n", + " encrypt('hi') returns 'lm'\n", + " encrypt('asdfghjkl') returns 'ewhjklnop'\n", + " encrypt('gf') returns 'kj'\n", + " encrypt('et') returns 'ix'\n", + " \"\"\"\n", + "\n", + "\n", + "def next_smallest(lst):\n", + " \"\"\"\n", + " You are given a list of integers.\n", + " Write a function next_smallest() that returns the 2nd smallest element of the list.\n", + " Return None if there is no such element.\n", + " \n", + " next_smallest([1, 2, 3, 4, 5]) == 2\n", + " next_smallest([5, 1, 4, 3, 2]) == 2\n", + " next_smallest([]) == None\n", + " next_smallest([1, 1]) == None\n", + " \"\"\"\n", + "\n", + "\n", + "def is_bored(S):\n", + " \"\"\"\n", + " You'll be given a string of words, and your task is to count the number\n", + " of boredoms. A boredom is a sentence that starts with the word \"I\".\n", + " Sentences are delimited by '.', '?' or '!'.\n", + " \n", + " For example:\n", + " >>> is_bored(\"Hello world\")\n", + " 0\n", + " >>> is_bored(\"The sky is blue. The sun is shining. I love this weather\")\n", + " 1\n", + " \"\"\"\n", + "\n", + "\n", + "def any_int(x, y, z):\n", + " '''\n", + " Create a function that takes 3 numbers.\n", + " Returns true if one of the numbers is equal to the sum of the other two, and all numbers are integers.\n", + " Returns false in any other cases.\n", + " \n", + " Examples\n", + " any_int(5, 2, 7) ➞ True\n", + " \n", + " any_int(3, 2, 2) ➞ False\n", + "\n", + " any_int(3, -2, 1) ➞ True\n", + " \n", + " any_int(3.6, -2.2, 2) ➞ False\n", + " \n", + "\n", + " \n", + " '''\n", + "\n", + "\n", + "def encode(message):\n", + " \"\"\"\n", + " Write a function that takes a message, and encodes in such a \n", + " way that it swaps case of all letters, replaces all vowels in \n", + " the message with the letter that appears 2 places ahead of that \n", + " vowel in the english alphabet. \n", + " Assume only letters. \n", + " \n", + " Examples:\n", + " >>> encode('test')\n", + " 'TGST'\n", + " >>> encode('This is a message')\n", + " 'tHKS KS C MGSSCGG'\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "def skjkasdkd(lst):\n", + " \"\"\"You are given a list of integers.\n", + " You need to find the largest prime value and return the sum of its digits.\n", + "\n", + " Examples:\n", + " For lst = [0,3,2,1,3,5,7,4,5,5,5,2,181,32,4,32,3,2,32,324,4,3] the output should be 10\n", + " For lst = [1,0,1,8,2,4597,2,1,3,40,1,2,1,2,4,2,5,1] the output should be 25\n", + " For lst = [1,3,1,32,5107,34,83278,109,163,23,2323,32,30,1,9,3] the output should be 13\n", + " For lst = [0,724,32,71,99,32,6,0,5,91,83,0,5,6] the output should be 11\n", + " For lst = [0,81,12,3,1,21] the output should be 3\n", + " For lst = [0,8,1,2,1,7] the output should be 7\n", + " \"\"\"\n", + "\n", + "\n", + "def check_dict_case(dict):\n", + " \"\"\"\n", + " Given a dictionary, return True if all keys are strings in lower \n", + " case or all keys are strings in upper case, else return False.\n", + " The function should return False is the given dictionary is empty.\n", + " Examples:\n", + " check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n", + " check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n", + " check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n", + " check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n", + " check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n", + " \"\"\"\n", + "\n", + "\n", + "def count_up_to(n):\n", + " \"\"\"Implement a function that takes an non-negative integer and returns an array of the first n\n", + " integers that are prime numbers and less than n.\n", + " for example:\n", + " count_up_to(5) => [2,3]\n", + " count_up_to(11) => [2,3,5,7]\n", + " count_up_to(0) => []\n", + " count_up_to(20) => [2,3,5,7,11,13,17,19]\n", + " count_up_to(1) => []\n", + " count_up_to(18) => [2,3,5,7,11,13,17]\n", + " \"\"\"\n", + "\n", + "\n", + "def multiply(a, b):\n", + " \"\"\"Complete the function that takes two integers and returns \n", + " the product of their unit digits.\n", + " Assume the input is always valid.\n", + " Examples:\n", + " multiply(148, 412) should return 16.\n", + " multiply(19, 28) should return 72.\n", + " multiply(2020, 1851) should return 0.\n", + " multiply(14,-15) should return 20.\n", + " \"\"\"\n", + "\n", + "\n", + "def count_upper(s):\n", + " \"\"\"\n", + " Given a string s, count the number of uppercase vowels in even indices.\n", + " \n", + " For example:\n", + " count_upper('aBCdEf') returns 1\n", + " count_upper('abcdefg') returns 0\n", + " count_upper('dBBE') returns 0\n", + " \"\"\"\n", + "\n", + "\n", + "def closest_integer(value):\n", + " '''\n", + " Create a function that takes a value (string) representing a number\n", + " and returns the closest integer to it. If the number is equidistant\n", + " from two integers, round it away from zero.\n", + "\n", + " Examples\n", + " >>> closest_integer(\"10\")\n", + " 10\n", + " >>> closest_integer(\"15.3\")\n", + " 15\n", + "\n", + " Note:\n", + " Rounding away from zero means that if the given number is equidistant\n", + " from two integers, the one you should return is the one that is the\n", + " farthest from zero. For example closest_integer(\"14.5\") should\n", + " return 15 and closest_integer(\"-14.5\") should return -15.\n", + " '''\n", + "\n", + "\n", + "def make_a_pile(n):\n", + " \"\"\"\n", + " Given a positive integer n, you have to make a pile of n levels of stones.\n", + " The first level has n stones.\n", + " The number of stones in the next level is:\n", + " - the next odd number if n is odd.\n", + " - the next even number if n is even.\n", + " Return the number of stones in each level in a list, where element at index\n", + " i represents the number of stones in the level (i+1).\n", + "\n", + " Examples:\n", + " >>> make_a_pile(3)\n", + " [3, 5, 7]\n", + " \"\"\"\n", + "\n", + "\n", + "def words_string(s):\n", + " \"\"\"\n", + " You will be given a string of words separated by commas or spaces. Your task is\n", + " to split the string into words and return an array of the words.\n", + " \n", + " For example:\n", + " words_string(\"Hi, my name is John\") == [\"Hi\", \"my\", \"name\", \"is\", \"John\"]\n", + " words_string(\"One, two, three, four, five, six\") == [\"One\", \"two\", \"three\", \"four\", \"five\", \"six\"]\n", + " \"\"\"\n", + "\n", + "\n", + "def choose_num(x, y):\n", + " \"\"\"This function takes two positive numbers x and y and returns the\n", + " biggest even integer number that is in the range [x, y] inclusive. If \n", + " there's no such number, then the function should return -1.\n", + "\n", + " For example:\n", + " choose_num(12, 15) = 14\n", + " choose_num(13, 12) = -1\n", + " \"\"\"\n", + "\n", + "\n", + "def rounded_avg(n, m):\n", + " \"\"\"You are given two positive integers n and m, and your task is to compute the\n", + " average of the integers from n through m (including n and m). \n", + " Round the answer to the nearest integer and convert that to binary.\n", + " If n is greater than m, return -1.\n", + " Example:\n", + " rounded_avg(1, 5) => \"0b11\"\n", + " rounded_avg(7, 5) => -1\n", + " rounded_avg(10, 20) => \"0b1111\"\n", + " rounded_avg(20, 33) => \"0b11010\"\n", + " \"\"\"\n", + "\n", + "\n", + "def unique_digits(x):\n", + " \"\"\"Given a list of positive integers x. return a sorted list of all \n", + " elements that hasn't any even digit.\n", + "\n", + " Note: Returned list should be sorted in increasing order.\n", + " \n", + " For example:\n", + " >>> unique_digits([15, 33, 1422, 1])\n", + " [1, 15, 33]\n", + " >>> unique_digits([152, 323, 1422, 10])\n", + " []\n", + " \"\"\"\n", + "\n", + "\n", + "def by_length(arr):\n", + " \"\"\"\n", + " Given an array of integers, sort the integers that are between 1 and 9 inclusive,\n", + " reverse the resulting array, and then replace each digit by its corresponding name from\n", + " \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\".\n", + "\n", + " For example:\n", + " arr = [2, 1, 1, 4, 5, 8, 2, 3] \n", + " -> sort arr -> [1, 1, 2, 2, 3, 4, 5, 8] \n", + " -> reverse arr -> [8, 5, 4, 3, 2, 2, 1, 1]\n", + " return [\"Eight\", \"Five\", \"Four\", \"Three\", \"Two\", \"Two\", \"One\", \"One\"]\n", + " \n", + " If the array is empty, return an empty array:\n", + " arr = []\n", + " return []\n", + " \n", + " If the array has any strange number ignore it:\n", + " arr = [1, -1 , 55] \n", + " -> sort arr -> [-1, 1, 55]\n", + " -> reverse arr -> [55, 1, -1]\n", + " return = ['One']\n", + " \"\"\"\n", + "\n", + "\n", + "def f(n):\n", + " \"\"\" Implement the function f that takes n as a parameter,\n", + " and returns a list of size n, such that the value of the element at index i is the factorial of i if i is even\n", + " or the sum of numbers from 1 to i otherwise.\n", + " i starts from 1.\n", + " the factorial of i is the multiplication of the numbers from 1 to i (1 * 2 * ... * i).\n", + " Example:\n", + " f(5) == [1, 2, 6, 24, 15]\n", + " \"\"\"\n", + "\n", + "\n", + "def even_odd_palindrome(n):\n", + " \"\"\"\n", + " Given a positive integer n, return a tuple that has the number of even and odd\n", + " integer palindromes that fall within the range(1, n), inclusive.\n", + "\n", + " Example 1:\n", + "\n", + " Input: 3\n", + " Output: (1, 2)\n", + " Explanation:\n", + " Integer palindrome are 1, 2, 3. one of them is even, and two of them are odd.\n", + "\n", + " Example 2:\n", + "\n", + " Input: 12\n", + " Output: (4, 6)\n", + " Explanation:\n", + " Integer palindrome are 1, 2, 3, 4, 5, 6, 7, 8, 9, 11. four of them are even, and 6 of them are odd.\n", + "\n", + " Note:\n", + " 1. 1 <= n <= 10^3\n", + " 2. returned tuple has the number of even and odd integer palindromes respectively.\n", + " \"\"\"\n", + "\n", + "\n", + "def count_nums(arr):\n", + " \"\"\"\n", + " Write a function count_nums which takes an array of integers and returns\n", + " the number of elements which has a sum of digits > 0.\n", + " If a number is negative, then its first signed digit will be negative:\n", + " e.g. -123 has signed digits -1, 2, and 3.\n", + " >>> count_nums([]) == 0\n", + " >>> count_nums([-1, 11, -11]) == 1\n", + " >>> count_nums([1, 1, 2]) == 3\n", + " \"\"\"\n", + "\n", + "\n", + "def move_one_ball(arr):\n", + " \"\"\"We have an array 'arr' of N integers arr[1], arr[2], ..., arr[N].The\n", + " numbers in the array will be randomly ordered. Your task is to determine if\n", + " it is possible to get an array sorted in non-decreasing order by performing \n", + " the following operation on the given array:\n", + " You are allowed to perform right shift operation any number of times.\n", + " \n", + " One right shift operation means shifting all elements of the array by one\n", + " position in the right direction. The last element of the array will be moved to\n", + " the starting position in the array i.e. 0th index. \n", + "\n", + " If it is possible to obtain the sorted array by performing the above operation\n", + " then return True else return False.\n", + " If the given array is empty then return True.\n", + "\n", + " Note: The given list is guaranteed to have unique elements.\n", + "\n", + " For Example:\n", + " \n", + " move_one_ball([3, 4, 5, 1, 2])==>True\n", + " Explanation: By performin 2 right shift operations, non-decreasing order can\n", + " be achieved for the given array.\n", + " move_one_ball([3, 5, 4, 1, 2])==>False\n", + " Explanation:It is not possible to get non-decreasing order for the given\n", + " array by performing any number of right shift operations.\n", + " \n", + " \"\"\"\n", + "\n", + "\n", + "def exchange(lst1, lst2):\n", + " \"\"\"In this problem, you will implement a function that takes two lists of numbers,\n", + " and determines whether it is possible to perform an exchange of elements\n", + " between them to make lst1 a list of only even numbers.\n", + " There is no limit on the number of exchanged elements between lst1 and lst2.\n", + " If it is possible to exchange elements between the lst1 and lst2 to make\n", + " all the elements of lst1 to be even, return \"YES\".\n", + " Otherwise, return \"NO\".\n", + " For example:\n", + " exchange([1, 2, 3, 4], [1, 2, 3, 4]) => \"YES\"\n", + " exchange([1, 2, 3, 4], [1, 5, 3, 4]) => \"NO\"\n", + " It is assumed that the input lists will be non-empty.\n", + " \"\"\"\n", + "\n", + "\n", + "def histogram(test):\n", + " \"\"\"Given a string representing a space separated lowercase letters, return a dictionary\n", + " of the letter with the most repetition and containing the corresponding count.\n", + " If several letters have the same occurrence, return all of them.\n", + " \n", + " Example:\n", + " histogram('a b c') == {'a': 1, 'b': 1, 'c': 1}\n", + " histogram('a b b a') == {'a': 2, 'b': 2}\n", + " histogram('a b c a b') == {'a': 2, 'b': 2}\n", + " histogram('b b b b a') == {'b': 4}\n", + " histogram('') == {}\n", + "\n", + " \"\"\"\n", + "\n", + "\n", + "def reverse_delete(s,c):\n", + " \"\"\"Task\n", + " We are given two strings s and c, you have to deleted all the characters in s that are equal to any character in c\n", + " then check if the result string is palindrome.\n", + " A string is called palindrome if it reads the same backward as forward.\n", + " You should return a tuple containing the result string and True/False for the check.\n", + " Example\n", + " For s = \"abcde\", c = \"ae\", the result should be ('bcd',False)\n", + " For s = \"abcdef\", c = \"b\" the result should be ('acdef',False)\n", + " For s = \"abcdedcba\", c = \"ab\", the result should be ('cdedc',True)\n", + " \"\"\"\n", + "\n", + "\n", + "def odd_count(lst):\n", + " \"\"\"Given a list of strings, where each string consists of only digits, return a list.\n", + " Each element i of the output should be \"the number of odd elements in the\n", + " string i of the input.\" where all the i's should be replaced by the number\n", + " of odd digits in the i'th string of the input.\n", + "\n", + " >>> odd_count(['1234567'])\n", + " [\"the number of odd elements 4n the str4ng 4 of the 4nput.\"]\n", + " >>> odd_count(['3',\"11111111\"])\n", + " [\"the number of odd elements 1n the str1ng 1 of the 1nput.\",\n", + " \"the number of odd elements 8n the str8ng 8 of the 8nput.\"]\n", + " \"\"\"\n", + "\n", + "\n", + "def minSubArraySum(nums):\n", + " \"\"\"\n", + " Given an array of integers nums, find the minimum sum of any non-empty sub-array\n", + " of nums.\n", + " Example\n", + " minSubArraySum([2, 3, 4, 1, 2, 4]) == 1\n", + " minSubArraySum([-1, -2, -3]) == -6\n", + " \"\"\"\n", + "\n", + "import math\n", + "\n", + "def max_fill(grid, capacity):\n", + " \"\"\"\n", + " You are given a rectangular grid of wells. Each row represents a single well,\n", + " and each 1 in a row represents a single unit of water.\n", + " Each well has a corresponding bucket that can be used to extract water from it, \n", + " and all buckets have the same capacity.\n", + " Your task is to use the buckets to empty the wells.\n", + " Output the number of times you need to lower the buckets.\n", + "\n", + " Example 1:\n", + " Input: \n", + " grid : [[0,0,1,0], [0,1,0,0], [1,1,1,1]]\n", + " bucket_capacity : 1\n", + " Output: 6\n", + "\n", + " Example 2:\n", + " Input: \n", + " grid : [[0,0,1,1], [0,0,0,0], [1,1,1,1], [0,1,1,1]]\n", + " bucket_capacity : 2\n", + " Output: 5\n", + " \n", + " Example 3:\n", + " Input: \n", + " grid : [[0,0,0], [0,0,0]]\n", + " bucket_capacity : 5\n", + " Output: 0\n", + "\n", + " Constraints:\n", + " * all wells have the same length\n", + " * 1 <= grid.length <= 10^2\n", + " * 1 <= grid[:,1].length <= 10^2\n", + " * grid[i][j] -> 0 | 1\n", + " * 1 <= capacity <= 10\n", + " \"\"\"\n", + "\n", + "\n", + "def sort_array(arr):\n", + " \"\"\"\n", + " In this Kata, you have to sort an array of non-negative integers according to\n", + " number of ones in their binary representation in ascending order.\n", + " For similar number of ones, sort based on decimal value.\n", + "\n", + " It must be implemented like this:\n", + " >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]\n", + " >>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]\n", + " >>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]\n", + " \"\"\"\n", + "\n", + "\n", + "def select_words(s, n):\n", + " \"\"\"Given a string s and a natural number n, you have been tasked to implement \n", + " a function that returns a list of all words from string s that contain exactly \n", + " n consonants, in order these words appear in the string s.\n", + " If the string s is empty then the function should return an empty list.\n", + " Note: you may assume the input string contains only letters and spaces.\n", + " Examples:\n", + " select_words(\"Mary had a little lamb\", 4) ==> [\"little\"]\n", + " select_words(\"Mary had a little lamb\", 3) ==> [\"Mary\", \"lamb\"]\n", + " select_words(\"simple white space\", 2) ==> []\n", + " select_words(\"Hello world\", 4) ==> [\"world\"]\n", + " select_words(\"Uncle sam\", 3) ==> [\"Uncle\"]\n", + " \"\"\"\n", + "\n", + "\n", + "def get_closest_vowel(word):\n", + " \"\"\"You are given a word. Your task is to find the closest vowel that stands between \n", + " two consonants from the right side of the word (case sensitive).\n", + " \n", + " Vowels in the beginning and ending doesn't count. Return empty string if you didn't\n", + " find any vowel met the above condition. \n", + "\n", + " You may assume that the given string contains English letter only.\n", + "\n", + " Example:\n", + " get_closest_vowel(\"yogurt\") ==> \"u\"\n", + " get_closest_vowel(\"FULL\") ==> \"U\"\n", + " get_closest_vowel(\"quick\") ==> \"\"\n", + " get_closest_vowel(\"ab\") ==> \"\"\n", + " \"\"\"\n", + "\n", + "\n", + "def match_parens(lst):\n", + " '''\n", + " You are given a list of two strings, both strings consist of open\n", + " parentheses '(' or close parentheses ')' only.\n", + " Your job is to check if it is possible to concatenate the two strings in\n", + " some order, that the resulting string will be good.\n", + " A string S is considered to be good if and only if all parentheses in S\n", + " are balanced. For example: the string '(())()' is good, while the string\n", + " '())' is not.\n", + " Return 'Yes' if there's a way to make a good string, and return 'No' otherwise.\n", + "\n", + " Examples:\n", + " match_parens(['()(', ')']) == 'Yes'\n", + " match_parens([')', ')']) == 'No'\n", + " '''\n", + "\n", + "\n", + "def maximum(arr, k):\n", + " \"\"\"\n", + " Given an array arr of integers and a positive integer k, return a sorted list \n", + " of length k with the maximum k numbers in arr.\n", + "\n", + " Example 1:\n", + "\n", + " Input: arr = [-3, -4, 5], k = 3\n", + " Output: [-4, -3, 5]\n", + "\n", + " Example 2:\n", + "\n", + " Input: arr = [4, -4, 4], k = 2\n", + " Output: [4, 4]\n", + "\n", + " Example 3:\n", + "\n", + " Input: arr = [-3, 2, 1, 2, -1, -2, 1], k = 1\n", + " Output: [2]\n", + "\n", + " Note:\n", + " 1. The length of the array will be in the range of [1, 1000].\n", + " 2. The elements in the array will be in the range of [-1000, 1000].\n", + " 3. 0 <= k <= len(arr)\n", + " \"\"\"\n", + "\n", + "\n", + "def solution(lst):\n", + " \"\"\"Given a non-empty list of integers, return the sum of all of the odd elements that are in even positions.\n", + " \n", + "\n", + " Examples\n", + " solution([5, 8, 7, 1]) ==> 12\n", + " solution([3, 3, 3, 3, 3]) ==> 9\n", + " solution([30, 13, 24, 321]) ==>0\n", + " \"\"\"\n", + "\n", + "\n", + "def add_elements(arr, k):\n", + " \"\"\"\n", + " Given a non-empty array of integers arr and an integer k, return\n", + " the sum of the elements with at most two digits from the first k elements of arr.\n", + "\n", + " Example:\n", + "\n", + " Input: arr = [111,21,3,4000,5,6,7,8,9], k = 4\n", + " Output: 24 # sum of 21 + 3\n", + "\n", + " Constraints:\n", + " 1. 1 <= len(arr) <= 100\n", + " 2. 1 <= k <= len(arr)\n", + " \"\"\"\n", + "\n", + "\n", + "def get_odd_collatz(n):\n", + " \"\"\"\n", + " Given a positive integer n, return a sorted list that has the odd numbers in collatz sequence.\n", + "\n", + " The Collatz conjecture is a conjecture in mathematics that concerns a sequence defined\n", + " as follows: start with any positive integer n. Then each term is obtained from the \n", + " previous term as follows: if the previous term is even, the next term is one half of \n", + " the previous term. If the previous term is odd, the next term is 3 times the previous\n", + " term plus 1. The conjecture is that no matter what value of n, the sequence will always reach 1.\n", + "\n", + " Note: \n", + " 1. Collatz(1) is [1].\n", + " 2. returned list sorted in increasing order.\n", + "\n", + " For example:\n", + " get_odd_collatz(5) returns [1, 5] # The collatz sequence for 5 is [5, 16, 8, 4, 2, 1], so the odd numbers are only 1, and 5.\n", + " \"\"\"\n", + "\n", + "\n", + "def valid_date(date):\n", + " \"\"\"You have to write a function which validates a given date string and\n", + " returns True if the date is valid otherwise False.\n", + " The date is valid if all of the following rules are satisfied:\n", + " 1. The date string is not empty.\n", + " 2. The number of days is not less than 1 or higher than 31 days for months 1,3,5,7,8,10,12. And the number of days is not less than 1 or higher than 30 days for months 4,6,9,11. And, the number of days is not less than 1 or higher than 29 for the month 2.\n", + " 3. The months should not be less than 1 or higher than 12.\n", + " 4. The date should be in the format: mm-dd-yyyy\n", + "\n", + " for example: \n", + " valid_date('03-11-2000') => True\n", + "\n", + " valid_date('15-01-2012') => False\n", + "\n", + " valid_date('04-0-2040') => False\n", + "\n", + " valid_date('06-04-2020') => True\n", + "\n", + " valid_date('06/04/2020') => False\n", + " \"\"\"\n", + "\n", + "\n", + "def split_words(txt):\n", + " '''\n", + " Given a string of words, return a list of words split on whitespace, if no whitespaces exists in the text you\n", + " should split on commas ',' if no commas exists you should return the number of lower-case letters with odd order in the\n", + " alphabet, ord('a') = 0, ord('b') = 1, ... ord('z') = 25\n", + " Examples\n", + " split_words(\"Hello world!\") ➞ [\"Hello\", \"world!\"]\n", + " split_words(\"Hello,world!\") ➞ [\"Hello\", \"world!\"]\n", + " split_words(\"abcdef\") == 3 \n", + " '''\n", + "\n", + "\n", + "def is_sorted(lst):\n", + " '''\n", + " Given a list of numbers, return whether or not they are sorted\n", + " in ascending order. If list has more than 1 duplicate of the same\n", + " number, return False. Assume no negative numbers and only integers.\n", + "\n", + " Examples\n", + " is_sorted([5]) ➞ True\n", + " is_sorted([1, 2, 3, 4, 5]) ➞ True\n", + " is_sorted([1, 3, 2, 4, 5]) ➞ False\n", + " is_sorted([1, 2, 3, 4, 5, 6]) ➞ True\n", + " is_sorted([1, 2, 3, 4, 5, 6, 7]) ➞ True\n", + " is_sorted([1, 3, 2, 4, 5, 6, 7]) ➞ False\n", + " is_sorted([1, 2, 2, 3, 3, 4]) ➞ True\n", + " is_sorted([1, 2, 2, 2, 3, 4]) ➞ False\n", + " '''\n", + "\n", + "\n", + "def intersection(interval1, interval2):\n", + " \"\"\"You are given two intervals,\n", + " where each interval is a pair of integers. For example, interval = (start, end) = (1, 2).\n", + " The given intervals are closed which means that the interval (start, end)\n", + " includes both start and end.\n", + " For each given interval, it is assumed that its start is less or equal its end.\n", + " Your task is to determine whether the length of intersection of these two \n", + " intervals is a prime number.\n", + " Example, the intersection of the intervals (1, 3), (2, 4) is (2, 3)\n", + " which its length is 1, which not a prime number.\n", + " If the length of the intersection is a prime number, return \"YES\",\n", + " otherwise, return \"NO\".\n", + " If the two intervals don't intersect, return \"NO\".\n", + "\n", + "\n", + " [input/output] samples:\n", + " intersection((1, 2), (2, 3)) ==> \"NO\"\n", + " intersection((-1, 1), (0, 4)) ==> \"NO\"\n", + " intersection((-3, -1), (-5, 5)) ==> \"YES\"\n", + " \"\"\"\n", + "\n", + "\n", + "def prod_signs(arr):\n", + " \"\"\"\n", + " You are given an array arr of integers and you need to return\n", + " sum of magnitudes of integers multiplied by product of all signs\n", + " of each number in the array, represented by 1, -1 or 0.\n", + " Note: return None for empty arr.\n", + "\n", + " Example:\n", + " >>> prod_signs([1, 2, 2, -4]) == -9\n", + " >>> prod_signs([0, 1]) == 0\n", + " >>> prod_signs([]) == None\n", + " \"\"\"\n", + "\n", + "\n", + "def minPath(grid, k):\n", + " \"\"\"\n", + " Given a grid with N rows and N columns (N >= 2) and a positive integer k, \n", + " each cell of the grid contains a value. Every integer in the range [1, N * N]\n", + " inclusive appears exactly once on the cells of the grid.\n", + "\n", + " You have to find the minimum path of length k in the grid. You can start\n", + " from any cell, and in each step you can move to any of the neighbor cells,\n", + " in other words, you can go to cells which share an edge with you current\n", + " cell.\n", + " Please note that a path of length k means visiting exactly k cells (not\n", + " necessarily distinct).\n", + " You CANNOT go off the grid.\n", + " A path A (of length k) is considered less than a path B (of length k) if\n", + " after making the ordered lists of the values on the cells that A and B go\n", + " through (let's call them lst_A and lst_B), lst_A is lexicographically less\n", + " than lst_B, in other words, there exist an integer index i (1 <= i <= k)\n", + " such that lst_A[i] < lst_B[i] and for any j (1 <= j < i) we have\n", + " lst_A[j] = lst_B[j].\n", + " It is guaranteed that the answer is unique.\n", + " Return an ordered list of the values on the cells that the minimum path go through.\n", + "\n", + " Examples:\n", + "\n", + " Input: grid = [ [1,2,3], [4,5,6], [7,8,9]], k = 3\n", + " Output: [1, 2, 1]\n", + "\n", + " Input: grid = [ [5,9,3], [4,1,6], [7,8,2]], k = 1\n", + " Output: [1]\n", + " \"\"\"\n", + "\n", + "\n", + "def tri(n):\n", + " \"\"\"Everyone knows Fibonacci sequence, it was studied deeply by mathematicians in \n", + " the last couple centuries. However, what people don't know is Tribonacci sequence.\n", + " Tribonacci sequence is defined by the recurrence:\n", + " tri(1) = 3\n", + " tri(n) = 1 + n / 2, if n is even.\n", + " tri(n) = tri(n - 1) + tri(n - 2) + tri(n + 1), if n is odd.\n", + " For example:\n", + " tri(2) = 1 + (2 / 2) = 2\n", + " tri(4) = 3\n", + " tri(3) = tri(2) + tri(1) + tri(4)\n", + " = 2 + 3 + 3 = 8 \n", + " You are given a non-negative integer number n, you have to a return a list of the \n", + " first n + 1 numbers of the Tribonacci sequence.\n", + " Examples:\n", + " tri(3) = [1, 3, 2, 8]\n", + " \"\"\"\n", + "\n", + "\n", + "def digits(n):\n", + " \"\"\"Given a positive integer n, return the product of the odd digits.\n", + " Return 0 if all digits are even.\n", + " For example:\n", + " digits(1) == 1\n", + " digits(4) == 0\n", + " digits(235) == 15\n", + " \"\"\"\n", + "\n", + "\n", + "def is_nested(string):\n", + " '''\n", + " Create a function that takes a string as input which contains only square brackets.\n", + " The function should return True if and only if there is a valid subsequence of brackets \n", + " where at least one bracket in the subsequence is nested.\n", + "\n", + " is_nested('[[]]') ➞ True\n", + " is_nested('[]]]]]]][[[[[]') ➞ False\n", + " is_nested('[][]') ➞ False\n", + " is_nested('[]') ➞ False\n", + " is_nested('[[][]]') ➞ True\n", + " is_nested('[[]][[') ➞ True\n", + " '''\n", + "\n", + "\n", + "\n", + "def sum_squares(lst):\n", + " \"\"\"You are given a list of numbers.\n", + " You need to return the sum of squared numbers in the given list,\n", + " round each element in the list to the upper int(Ceiling) first.\n", + " Examples:\n", + " For lst = [1,2,3] the output should be 14\n", + " For lst = [1,4,9] the output should be 98\n", + " For lst = [1,3,5,7] the output should be 84\n", + " For lst = [1.4,4.2,0] the output should be 29\n", + " For lst = [-2.4,1,1] the output should be 6\n", + " \n", + "\n", + " \"\"\"\n", + "\n", + "\n", + "def check_if_last_char_is_a_letter(txt):\n", + " '''\n", + " Create a function that returns True if the last character\n", + " of a given string is an alphabetical character and is not\n", + " a part of a word, and False otherwise.\n", + " Note: \"word\" is a group of characters separated by space.\n", + "\n", + " Examples:\n", + " check_if_last_char_is_a_letter(\"apple pie\") ➞ False\n", + " check_if_last_char_is_a_letter(\"apple pi e\") ➞ True\n", + " check_if_last_char_is_a_letter(\"apple pi e \") ➞ False\n", + " check_if_last_char_is_a_letter(\"\") ➞ False \n", + " '''\n", + "\n", + "\n", + "def can_arrange(arr):\n", + " \"\"\"Create a function which returns the largest index of an element which\n", + " is not greater than or equal to the element immediately preceding it. If\n", + " no such element exists then return -1. The given array will not contain\n", + " duplicate values.\n", + "\n", + " Examples:\n", + " can_arrange([1,2,4,3,5]) = 3\n", + " can_arrange([1,2,3]) = -1\n", + " \"\"\"\n", + "\n", + "\n", + "def largest_smallest_integers(lst):\n", + " '''\n", + " Create a function that returns a tuple (a, b), where 'a' is\n", + " the largest of negative integers, and 'b' is the smallest\n", + " of positive integers in a list.\n", + " If there is no negative or positive integers, return them as None.\n", + "\n", + " Examples:\n", + " largest_smallest_integers([2, 4, 1, 3, 5, 7]) == (None, 1)\n", + " largest_smallest_integers([]) == (None, None)\n", + " largest_smallest_integers([0]) == (None, None)\n", + " '''\n", + "\n", + "\n", + "def compare_one(a, b):\n", + " \"\"\"\n", + " Create a function that takes integers, floats, or strings representing\n", + " real numbers, and returns the larger variable in its given variable type.\n", + " Return None if the values are equal.\n", + " Note: If a real number is represented as a string, the floating point might be . or ,\n", + "\n", + " compare_one(1, 2.5) ➞ 2.5\n", + " compare_one(1, \"2,3\") ➞ \"2,3\"\n", + " compare_one(\"5,1\", \"6\") ➞ \"6\"\n", + " compare_one(\"1\", 1) ➞ None\n", + " \"\"\"\n", + "\n", + "\n", + "def is_equal_to_sum_even(n):\n", + " \"\"\"Evaluate whether the given number n can be written as the sum of exactly 4 positive even numbers\n", + " Example\n", + " is_equal_to_sum_even(4) == False\n", + " is_equal_to_sum_even(6) == False\n", + " is_equal_to_sum_even(8) == True\n", + " \"\"\"\n", + "\n", + "\n", + "def special_factorial(n):\n", + " \"\"\"The Brazilian factorial is defined as:\n", + " brazilian_factorial(n) = n! * (n-1)! * (n-2)! * ... * 1!\n", + " where n > 0\n", + "\n", + " For example:\n", + " >>> special_factorial(4)\n", + " 288\n", + "\n", + " The function will receive an integer as input and should return the special\n", + " factorial of this integer.\n", + " \"\"\"\n", + "\n", + "\n", + "def fix_spaces(text):\n", + " \"\"\"\n", + " Given a string text, replace all spaces in it with underscores, \n", + " and if a string has more than 2 consecutive spaces, \n", + " then replace all consecutive spaces with - \n", + " \n", + " fix_spaces(\"Example\") == \"Example\"\n", + " fix_spaces(\"Example 1\") == \"Example_1\"\n", + " fix_spaces(\" Example 2\") == \"_Example_2\"\n", + " fix_spaces(\" Example 3\") == \"_Example-3\"\n", + " \"\"\"\n", + "\n", + "\n", + "def file_name_check(file_name):\n", + " \"\"\"Create a function which takes a string representing a file's name, and returns\n", + " 'Yes' if the the file's name is valid, and returns 'No' otherwise.\n", + " A file's name is considered to be valid if and only if all the following conditions \n", + " are met:\n", + " - There should not be more than three digits ('0'-'9') in the file's name.\n", + " - The file's name contains exactly one dot '.'\n", + " - The substring before the dot should not be empty, and it starts with a letter from \n", + " the latin alphapet ('a'-'z' and 'A'-'Z').\n", + " - The substring after the dot should be one of these: ['txt', 'exe', 'dll']\n", + " Examples:\n", + " file_name_check(\"example.txt\") # => 'Yes'\n", + " file_name_check(\"1example.dll\") # => 'No' (the name should start with a latin alphapet letter)\n", + " \"\"\"\n", + "\n", + "\n", + "\n", + "\n", + "def sum_squares(lst):\n", + " \"\"\"\"\n", + " This function will take a list of integers. For all entries in the list, the function shall square the integer entry if its index is a \n", + " multiple of 3 and will cube the integer entry if its index is a multiple of 4 and not a multiple of 3. The function will not \n", + " change the entries in the list whose indexes are not a multiple of 3 or 4. The function shall then return the sum of all entries. \n", + " \n", + " Examples:\n", + " For lst = [1,2,3] the output should be 6\n", + " For lst = [] the output should be 0\n", + " For lst = [-1,-5,2,-1,-5] the output should be -126\n", + " \"\"\"\n", + "\n", + "\n", + "def words_in_sentence(sentence):\n", + " \"\"\"\n", + " You are given a string representing a sentence,\n", + " the sentence contains some words separated by a space,\n", + " and you have to return a string that contains the words from the original sentence,\n", + " whose lengths are prime numbers,\n", + " the order of the words in the new string should be the same as the original one.\n", + "\n", + " Example 1:\n", + " Input: sentence = \"This is a test\"\n", + " Output: \"is\"\n", + "\n", + " Example 2:\n", + " Input: sentence = \"lets go for swimming\"\n", + " Output: \"go for\"\n", + "\n", + " Constraints:\n", + " * 1 <= len(sentence) <= 100\n", + " * sentence contains only letters\n", + " \"\"\"\n", + "\n", + "\n", + "def simplify(x, n):\n", + " \"\"\"Your task is to implement a function that will simplify the expression\n", + " x * n. The function returns True if x * n evaluates to a whole number and False\n", + " otherwise. Both x and n, are string representation of a fraction, and have the following format,\n", + " / where both numerator and denominator are positive whole numbers.\n", + "\n", + " You can assume that x, and n are valid fractions, and do not have zero as denominator.\n", + "\n", + " simplify(\"1/5\", \"5/1\") = True\n", + " simplify(\"1/6\", \"2/1\") = False\n", + " simplify(\"7/10\", \"10/2\") = False\n", + " \"\"\"\n", + "\n", + "\n", + "def order_by_points(nums):\n", + " \"\"\"\n", + " Write a function which sorts the given list of integers\n", + " in ascending order according to the sum of their digits.\n", + " Note: if there are several items with similar sum of their digits,\n", + " order them based on their index in original list.\n", + "\n", + " For example:\n", + " >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]\n", + " >>> order_by_points([]) == []\n", + " \"\"\"\n", + "\n", + "\n", + "def specialFilter(nums):\n", + " \"\"\"Write a function that takes an array of numbers as input and returns \n", + " the number of elements in the array that are greater than 10 and both \n", + " first and last digits of a number are odd (1, 3, 5, 7, 9).\n", + " For example:\n", + " specialFilter([15, -73, 14, -15]) => 1 \n", + " specialFilter([33, -2, -3, 45, 21, 109]) => 2\n", + " \"\"\"\n", + "\n", + "\n", + "def get_max_triples(n):\n", + " \"\"\"\n", + " You are given a positive integer n. You have to create an integer array a of length n.\n", + " For each i (1 ≤ i ≤ n), the value of a[i] = i * i - i + 1.\n", + " Return the number of triples (a[i], a[j], a[k]) of a where i < j < k, \n", + " and a[i] + a[j] + a[k] is a multiple of 3.\n", + "\n", + " Example :\n", + " Input: n = 5\n", + " Output: 1\n", + " Explanation: \n", + " a = [1, 3, 7, 13, 21]\n", + " The only valid triple is (1, 7, 13).\n", + " \"\"\"\n", + "\n", + "\n", + "def bf(planet1, planet2):\n", + " '''\n", + " There are eight planets in our solar system: the closerst to the Sun \n", + " is Mercury, the next one is Venus, then Earth, Mars, Jupiter, Saturn, \n", + " Uranus, Neptune.\n", + " Write a function that takes two planet names as strings planet1 and planet2. \n", + " The function should return a tuple containing all planets whose orbits are \n", + " located between the orbit of planet1 and the orbit of planet2, sorted by \n", + " the proximity to the sun. \n", + " The function should return an empty tuple if planet1 or planet2\n", + " are not correct planet names. \n", + " Examples\n", + " bf(\"Jupiter\", \"Neptune\") ==> (\"Saturn\", \"Uranus\")\n", + " bf(\"Earth\", \"Mercury\") ==> (\"Venus\")\n", + " bf(\"Mercury\", \"Uranus\") ==> (\"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\")\n", + " '''\n", + "\n", + "\n", + "def sorted_list_sum(lst):\n", + " \"\"\"Write a function that accepts a list of strings as a parameter,\n", + " deletes the strings that have odd lengths from it,\n", + " and returns the resulted list with a sorted order,\n", + " The list is always a list of strings and never an array of numbers,\n", + " and it may contain duplicates.\n", + " The order of the list should be ascending by length of each word, and you\n", + " should return the list sorted by that rule.\n", + " If two words have the same length, sort the list alphabetically.\n", + " The function should return a list of strings in sorted order.\n", + " You may assume that all words will have the same length.\n", + " For example:\n", + " assert list_sort([\"aa\", \"a\", \"aaa\"]) => [\"aa\"]\n", + " assert list_sort([\"ab\", \"a\", \"aaa\", \"cd\"]) => [\"ab\", \"cd\"]\n", + " \"\"\"\n", + "\n", + "\n", + "def x_or_y(n, x, y):\n", + " \"\"\"A simple program which should return the value of x if n is \n", + " a prime number and should return the value of y otherwise.\n", + "\n", + " Examples:\n", + " for x_or_y(7, 34, 12) == 34\n", + " for x_or_y(15, 8, 5) == 5\n", + " \n", + " \"\"\"\n", + "\n", + "\n", + "def double_the_difference(lst):\n", + " '''\n", + " Given a list of numbers, return the sum of squares of the numbers\n", + " in the list that are odd. Ignore numbers that are negative or not integers.\n", + " \n", + " double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10\n", + " double_the_difference([-1, -2, 0]) == 0\n", + " double_the_difference([9, -2]) == 81\n", + " double_the_difference([0]) == 0 \n", + " \n", + " If the input list is empty, return 0.\n", + " '''\n", + "\n", + "\n", + "def compare(game,guess):\n", + " \"\"\"I think we all remember that feeling when the result of some long-awaited\n", + " event is finally known. The feelings and thoughts you have at that moment are\n", + " definitely worth noting down and comparing.\n", + " Your task is to determine if a person correctly guessed the results of a number of matches.\n", + " You are given two arrays of scores and guesses of equal length, where each index shows a match. \n", + " Return an array of the same length denoting how far off each guess was. If they have guessed correctly,\n", + " the value is 0, and if not, the value is the absolute difference between the guess and the score.\n", + " \n", + " \n", + " example:\n", + "\n", + " compare([1,2,3,4,5,1],[1,2,3,4,2,-2]) -> [0,0,0,0,3,3]\n", + " compare([0,5,0,0,0,4],[4,1,1,0,0,-2]) -> [4,4,1,0,0,6]\n", + " \"\"\"\n", + "\n", + "\n", + "def Strongest_Extension(class_name, extensions):\n", + " \"\"\"You will be given the name of a class (a string) and a list of extensions.\n", + " The extensions are to be used to load additional classes to the class. The\n", + " strength of the extension is as follows: Let CAP be the number of the uppercase\n", + " letters in the extension's name, and let SM be the number of lowercase letters \n", + " in the extension's name, the strength is given by the fraction CAP - SM. \n", + " You should find the strongest extension and return a string in this \n", + " format: ClassName.StrongestExtensionName.\n", + " If there are two or more extensions with the same strength, you should\n", + " choose the one that comes first in the list.\n", + " For example, if you are given \"Slices\" as the class and a list of the\n", + " extensions: ['SErviNGSliCes', 'Cheese', 'StuFfed'] then you should\n", + " return 'Slices.SErviNGSliCes' since 'SErviNGSliCes' is the strongest extension \n", + " (its strength is -1).\n", + " Example:\n", + " for Strongest_Extension('my_class', ['AA', 'Be', 'CC']) == 'my_class.AA'\n", + " \"\"\"\n", + "\n", + "\n", + "def cycpattern_check(a , b):\n", + " \"\"\"You are given 2 words. You need to return True if the second word or any of its rotations is a substring in the first word\n", + " cycpattern_check(\"abcd\",\"abd\") => False\n", + " cycpattern_check(\"hello\",\"ell\") => True\n", + " cycpattern_check(\"whassup\",\"psus\") => False\n", + " cycpattern_check(\"abab\",\"baa\") => True\n", + " cycpattern_check(\"efef\",\"eeff\") => False\n", + " cycpattern_check(\"himenss\",\"simen\") => True\n", + "\n", + " \"\"\"\n", + "\n", + "\n", + "def even_odd_count(num):\n", + " \"\"\"Given an integer. return a tuple that has the number of even and odd digits respectively.\n", + "\n", + " Example:\n", + " even_odd_count(-12) ==> (1, 1)\n", + " even_odd_count(123) ==> (1, 2)\n", + " \"\"\"\n", + "\n", + "\n", + "def int_to_mini_roman(number):\n", + " \"\"\"\n", + " Given a positive integer, obtain its roman numeral equivalent as a string,\n", + " and return it in lowercase.\n", + " Restrictions: 1 <= num <= 1000\n", + "\n", + " Examples:\n", + " >>> int_to_mini_roman(19) == 'xix'\n", + " >>> int_to_mini_roman(152) == 'clii'\n", + " >>> int_to_mini_roman(426) == 'cdxxvi'\n", + " \"\"\"\n", + "\n", + "\n", + "def right_angle_triangle(a, b, c):\n", + " '''\n", + " Given the lengths of the three sides of a triangle. Return True if the three\n", + " sides form a right-angled triangle, False otherwise.\n", + " A right-angled triangle is a triangle in which one angle is right angle or \n", + " 90 degree.\n", + " Example:\n", + " right_angle_triangle(3, 4, 5) == True\n", + " right_angle_triangle(1, 2, 3) == False\n", + " '''\n", + "\n", + "\n", + "def find_max(words):\n", + " \"\"\"Write a function that accepts a list of strings.\n", + " The list contains different words. Return the word with maximum number\n", + " of unique characters. If multiple strings have maximum number of unique\n", + " characters, return the one which comes first in lexicographical order.\n", + "\n", + " find_max([\"name\", \"of\", \"string\"]) == \"string\"\n", + " find_max([\"name\", \"enam\", \"game\"]) == \"enam\"\n", + " find_max([\"aaaaaaa\", \"bb\" ,\"cc\"]) == \"\"aaaaaaa\"\n", + " \"\"\"\n", + "\n", + "\n", + "def eat(number, need, remaining):\n", + " \"\"\"\n", + " You're a hungry rabbit, and you already have eaten a certain number of carrots,\n", + " but now you need to eat more carrots to complete the day's meals.\n", + " you should return an array of [ total number of eaten carrots after your meals,\n", + " the number of carrots left after your meals ]\n", + " if there are not enough remaining carrots, you will eat all remaining carrots, but will still be hungry.\n", + " \n", + " Example:\n", + " * eat(5, 6, 10) -> [11, 4]\n", + " * eat(4, 8, 9) -> [12, 1]\n", + " * eat(1, 10, 10) -> [11, 0]\n", + " * eat(2, 11, 5) -> [7, 0]\n", + " \n", + " Variables:\n", + " @number : integer\n", + " the number of carrots that you have eaten.\n", + " @need : integer\n", + " the number of carrots that you need to eat.\n", + " @remaining : integer\n", + " the number of remaining carrots thet exist in stock\n", + " \n", + " Constrain:\n", + " * 0 <= number <= 1000\n", + " * 0 <= need <= 1000\n", + " * 0 <= remaining <= 1000\n", + "\n", + " Have fun :)\n", + " \"\"\"\n", + "\n", + "\n", + "def do_algebra(operator, operand):\n", + " \"\"\"\n", + " Given two lists operator, and operand. The first list has basic algebra operations, and \n", + " the second list is a list of integers. Use the two given lists to build the algebric \n", + " expression and return the evaluation of this expression.\n", + "\n", + " The basic algebra operations:\n", + " Addition ( + ) \n", + " Subtraction ( - ) \n", + " Multiplication ( * ) \n", + " Floor division ( // ) \n", + " Exponentiation ( ** ) \n", + "\n", + " Example:\n", + " operator['+', '*', '-']\n", + " array = [2, 3, 4, 5]\n", + " result = 2 + 3 * 4 - 5\n", + " => result = 9\n", + "\n", + " Note:\n", + " The length of operator list is equal to the length of operand list minus one.\n", + " Operand is a list of of non-negative integers.\n", + " Operator list has at least one operator, and operand list has at least two operands.\n", + "\n", + " \"\"\"\n", + "\n", + "\n", + "def solve(s):\n", + " \"\"\"You are given a string s.\n", + " if s[i] is a letter, reverse its case from lower to upper or vise versa, \n", + " otherwise keep it as it is.\n", + " If the string contains no letters, reverse the string.\n", + " The function should return the resulted string.\n", + " Examples\n", + " solve(\"1234\") = \"4321\"\n", + " solve(\"ab\") = \"AB\"\n", + " solve(\"#a@C\") = \"#A@c\"\n", + " \"\"\"\n", + "\n", + "\n", + "def string_to_md5(text):\n", + " \"\"\"\n", + " Given a string 'text', return its md5 hash equivalent string.\n", + " If 'text' is an empty string, return None.\n", + "\n", + " >>> string_to_md5('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'\n", + " \"\"\"\n", + "\n", + "\n", + "def generate_integers(a, b):\n", + " \"\"\"\n", + " Given two positive integers a and b, return the even digits between a\n", + " and b, in ascending order.\n", + "\n", + " For example:\n", + " generate_integers(2, 8) => [2, 4, 6, 8]\n", + " generate_integers(8, 2) => [2, 4, 6, 8]\n", + " generate_integers(10, 14) => []\n", + " \"\"\"\n", + "\n" + ] + } + ], + "source": [ + "from evalplus.data import get_human_eval_plus\n", + "\n", + "humaneval = get_human_eval_plus()\n", + "print(humaneval['HumanEval/32'][\"entry_point\"])\n", + "print(humaneval['HumanEval/0'][\"prompt\"])\n", + "\n", + "for key, value in humaneval.items():\n", + " if key == \"HumanEval/32\" or key == \"HumanEval/38\" or key == \"HumanEval/50\":\n", + " pass\n", + " else:\n", + " entry_point = value[\"entry_point\"]\n", + " prompt = value[\"prompt\"]\n", + " print(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "find_zero\n", + "import math\n", + "\n", + "\n", + "def poly(xs: list, x: float):\n", + " \"\"\"\n", + " Evaluates polynomial with coefficients xs at point x.\n", + " return xs[0] + xs[1] * x + xs[1] * x^2 + .... xs[n] * x^n\n", + " \"\"\"\n", + " return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n", + "\n", + "\n", + "def find_zero(xs: list):\n", + " \"\"\" xs are coefficients of a polynomial.\n", + " find_zero find x such that poly(x) = 0.\n", + " find_zero returns only only zero point, even if there are many.\n", + " Moreover, find_zero only takes list xs having even number of coefficients\n", + " and largest non zero coefficient as it guarantees\n", + " a solution.\n", + " >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n", + " -0.5\n", + " >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n", + " 1.0\n", + " \"\"\"\n", + "\n", + "import math\n", + "def poly(xs: list, x: float):\n", + " \"\"\"\n", + " Evaluates polynomial with coefficients xs at point x.\n", + " return xs[0] + xs[1] * x + xs[2] * x^2 + .... xs[n] * x^n\n", + " \"\"\"\n", + " return sum(coeff * math.pow(x, i) for i, coeff in enumerate(xs))\n", + "def find_zero(xs: list):\n", + " \"\"\" \n", + " xs are coefficients of a polynomial.\n", + " find_zero finds x such that poly(x) = 0.\n", + " find_zero returns only one zero point, even if there are many.\n", + " Moreover, find_zero only takes list xs having an even number of coefficients\n", + " and the largest non-zero coefficient as it guarantees a solution.\n", + " \n", + " >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\n", + " -0.5\n", + " >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\n", + " 1.0\n", + " \"\"\"\n", + " if len(xs) % 2 != 0:\n", + " raise ValueError(\"The number of coefficients must be even.\")\n", + " \n", + " # Initial guess for the root\n", + " x0 = 0.0\n", + " tolerance = 1e-7\n", + " max_iterations = 1000\n", + " \n", + " for _ in range(max_iterations):\n", + " f_x0 = poly(xs, x0)\n", + " f_prime_x0 = sum(i * coeff * math.pow(x0, i - 1) for i, coeff in enumerate(xs) if i > 0)\n", + " \n", + " if f_prime_x0 == 0: # Avoid division by zero\n", + " raise ValueError(\"Derivative is zero. No solution found.\")\n", + " \n", + " x1 = x0 - f_x0 / f_prime_x0\n", + " \n", + " if abs(x1 - x0) < tolerance:\n", + " return x1\n", + " \n", + " x0 = x1\n", + " \n", + " raise ValueError(\"Maximum iterations reached. No solution found.\")\n" + ] + } + ], + "source": [ + "\n", + "from evalplus.data import get_human_eval_plus\n", + "\n", + "humaneval = get_human_eval_plus()\n", + "print(humaneval['HumanEval/32'][\"entry_point\"])\n", + "print(humaneval['HumanEval/32'][\"prompt\"])\n", + "result = {\"task_id\": \"HumanEval/32\", \"solution\": \"import math\\ndef poly(xs: list, x: float):\\n \\\"\\\"\\\"\\n Evaluates polynomial with coefficients xs at point x.\\n return xs[0] + xs[1] * x + xs[2] * x^2 + .... xs[n] * x^n\\n \\\"\\\"\\\"\\n return sum(coeff * math.pow(x, i) for i, coeff in enumerate(xs))\\ndef find_zero(xs: list):\\n \\\"\\\"\\\" \\n xs are coefficients of a polynomial.\\n find_zero finds x such that poly(x) = 0.\\n find_zero returns only one zero point, even if there are many.\\n Moreover, find_zero only takes list xs having an even number of coefficients\\n and the largest non-zero coefficient as it guarantees a solution.\\n \\n >>> round(find_zero([1, 2]), 2) # f(x) = 1 + 2x\\n -0.5\\n >>> round(find_zero([-6, 11, -6, 1]), 2) # (x - 1) * (x - 2) * (x - 3) = -6 + 11x - 6x^2 + x^3\\n 1.0\\n \\\"\\\"\\\"\\n if len(xs) % 2 != 0:\\n raise ValueError(\\\"The number of coefficients must be even.\\\")\\n \\n # Initial guess for the root\\n x0 = 0.0\\n tolerance = 1e-7\\n max_iterations = 1000\\n \\n for _ in range(max_iterations):\\n f_x0 = poly(xs, x0)\\n f_prime_x0 = sum(i * coeff * math.pow(x0, i - 1) for i, coeff in enumerate(xs) if i > 0)\\n \\n if f_prime_x0 == 0: # Avoid division by zero\\n raise ValueError(\\\"Derivative is zero. No solution found.\\\")\\n \\n x1 = x0 - f_x0 / f_prime_x0\\n \\n if abs(x1 - x0) < tolerance:\\n return x1\\n \\n x0 = x1\\n \\n raise ValueError(\\\"Maximum iterations reached. No solution found.\\\")\"}\n", + "print(result[\"solution\"])\n", + "# print(\"--------------------\")\n", + "# print(humaneval['HumanEval/38'][\"entry_point\"])\n", + "# print(humaneval['HumanEval/38'][\"prompt\"])\n", + "# print(\"--------------------\")\n", + "# print(humaneval['HumanEval/50'][\"entry_point\"])\n", + "# print(humaneval['HumanEval/50'][\"prompt\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-28 12:17:06.501 | INFO | metagpt.const:get_metagpt_package_root:29 - Package root set to /Users/trl/Github_project/MetaGPT-MathAI\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/39'}, {'task_id': 'HumanEval/65'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/83'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/127'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/145'}, {'task_id': 'HumanEval/160'}, {'task_id': 'HumanEval/163'}]\n", + "22\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/40'}, {'task_id': 'HumanEval/39'}, {'task_id': 'HumanEval/67'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/76'}, {'task_id': 'HumanEval/83'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/110'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/127'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/140'}, {'task_id': 'HumanEval/145'}, {'task_id': 'HumanEval/160'}, {'task_id': 'HumanEval/163'}]\n" + ] + } + ], + "source": [ + "from examples.ags.benchmark.humaneval import extract_failure_tests\n", + "\n", + "file_path_list = [\"alpha_based_101_eval_results.json\",\"alpha_based_102_eval_results.json\"]\n", + "\n", + "for file_path in file_path_list:\n", + " unpassed_exapmle = extract_failure_tests(file_path)\n", + " print(unpassed_exapmle)" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -134,6 +2713,38 @@ "\n", "for file_path in file_path_list:\n", " unpassed_exapmle = extract_failure_tests(file_path)\n", + " print(unpassed_exapmle)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/40'}, {'task_id': 'HumanEval/65'}, {'task_id': 'HumanEval/67'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/110'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/140'}, {'task_id': 'HumanEval/145'}]\n", + "14\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/83'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/140'}, {'task_id': 'HumanEval/145'}]\n", + "18\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/40'}, {'task_id': 'HumanEval/65'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/102'}, {'task_id': 'HumanEval/110'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/140'}, {'task_id': 'HumanEval/145'}, {'task_id': 'HumanEval/160'}]\n", + "16\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/65'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/83'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/110'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/127'}, {'task_id': 'HumanEval/126'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/145'}]\n", + "17\n", + "[{'task_id': 'HumanEval/32'}, {'task_id': 'HumanEval/40'}, {'task_id': 'HumanEval/65'}, {'task_id': 'HumanEval/74'}, {'task_id': 'HumanEval/75'}, {'task_id': 'HumanEval/91'}, {'task_id': 'HumanEval/93'}, {'task_id': 'HumanEval/99'}, {'task_id': 'HumanEval/110'}, {'task_id': 'HumanEval/115'}, {'task_id': 'HumanEval/127'}, {'task_id': 'HumanEval/130'}, {'task_id': 'HumanEval/132'}, {'task_id': 'HumanEval/134'}, {'task_id': 'HumanEval/129'}, {'task_id': 'HumanEval/145'}, {'task_id': 'HumanEval/160'}]\n" + ] + } + ], + "source": [ + "from examples.ags.benchmark.humaneval import extract_failure_tests\n", + "\n", + "file_path_list = [\"alpha_based_104_eval_results.json\", \"alpha_based_105_eval_results.json\", \"alpha_based_106_eval_results.json\", \"alpha_based_107_eval_results.json\", \"alpha_based_108_eval_results.json\"]\n", + "\n", + "for file_path in file_path_list:\n", + " unpassed_exapmle = extract_failure_tests(file_path)\n", " print(unpassed_exapmle)" ] }, @@ -689,6 +3300,143 @@ "\n", "print(modify_result[\"solution\"])" ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def fix_spaces(text):\n", + " \"\"\"\n", + " Given a string text, replace all spaces in it with underscores, \n", + " and if a string has more than 2 consecutive spaces, \n", + " then replace all consecutive spaces with - \n", + " \n", + " fix_spaces(\"Example\") == \"Example\"\n", + " fix_spaces(\"Example 1\") == \"Example_1\"\n", + " fix_spaces(\" Example 2\") == \"_Example_2\"\n", + " fix_spaces(\" Example 3\") == \"_Example-3\"\n", + " \"\"\"\n", + " # Replace multiple spaces with a hyphen\n", + " while ' ' in text: # While there are more than 2 consecutive spaces\n", + " text = text.replace(' ', '-')\n", + " \n", + " # Replace single spaces with underscores\n", + " text = text.replace(' ', '_')\n", + " \n", + " return text\n", + "\n", + "def check(candidate):\n", + " assert candidate('Example') == 'Example'\n", + " assert candidate('Mudasir Hanif ') == 'Mudasir_Hanif_'\n", + " assert candidate('Yellow Yellow Dirty Fellow') == 'Yellow_Yellow__Dirty__Fellow'\n", + " assert candidate('Exa mple') == 'Exa-mple'\n", + " assert candidate(' Exa 1 2 2 mple') == '-Exa_1_2_2_mple'\n", + "\n", + "\n", + "check(fix_spaces)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Example\n", + "Mudasir_Hanif\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 32\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtest_check\u001b[39m():\n\u001b[1;32m 30\u001b[0m check(fix_spaces)\n\u001b[0;32m---> 32\u001b[0m \u001b[43mtest_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[13], line 30\u001b[0m, in \u001b[0;36mtest_check\u001b[0;34m()\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtest_check\u001b[39m():\n\u001b[0;32m---> 30\u001b[0m \u001b[43mcheck\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfix_spaces\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[13], line 24\u001b[0m, in \u001b[0;36mcheck\u001b[0;34m(candidate)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcheck\u001b[39m(candidate):\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m candidate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mExample\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mExample\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 24\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m candidate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMudasir Hanif \u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMudasir_Hanif_\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m candidate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mYellow Yellow Dirty Fellow\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mYellow_Yellow__Dirty__Fellow\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m candidate(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mExa mple\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mExa-mple\u001b[39m\u001b[38;5;124m'\u001b[39m\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "import re\n", + "\n", + "def fix_spaces(text):\n", + " \"\"\"\n", + " Given a string text, replace all spaces in it with underscores, \n", + " and if a string has more than 2 consecutive spaces, \n", + " then replace all consecutive spaces with - \n", + " \n", + " fix_spaces(\"Example\") == \"Example\"\n", + " fix_spaces(\"Example 1\") == \"Example_1\"\n", + " fix_spaces(\" Example 2\") == \"_Example_2\"\n", + " fix_spaces(\" Example 3\") == \"_Example-3\"\n", + " \"\"\"\n", + " # Replace multiple spaces with a hyphen\n", + " text = re.sub(r' {3,}', '-', text)\n", + " # Replace single spaces with underscores\n", + " text = text.replace(' ', '_')\n", + " # Handle leading and trailing underscores\n", + " print(text.strip('_'))\n", + "\n", + "def check(candidate):\n", + " assert candidate('Example') == 'Example'\n", + " assert candidate('Mudasir Hanif ') == 'Mudasir_Hanif_'\n", + " assert candidate('Yellow Yellow Dirty Fellow') == 'Yellow_Yellow__Dirty__Fellow'\n", + " assert candidate('Exa mple') == 'Exa-mple'\n", + " assert candidate(' Exa 1 2 2 mple') == '-Exa_1_2_2_mple'\n", + "\n", + "def test_check():\n", + " check(fix_spaces)\n", + "\n", + "test_check()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import re\n", + "\n", + "def fix_spaces(text):\n", + " \"\"\"\n", + " Given a string text, replace all spaces in it with underscores, \n", + " and if a string has more than 2 consecutive spaces, \n", + " then replace all consecutive spaces with - \n", + " \n", + " fix_spaces(\"Example\") == \"Example\"\n", + " fix_spaces(\"Example 1\") == \"Example_1\"\n", + " fix_spaces(\" Example 2\") == \"_Example_2\"\n", + " fix_spaces(\" Example 3\") == \"_Example-3\"\n", + " \"\"\"\n", + " # Replace multiple spaces with a hyphen\n", + " text = re.sub(r' {3,}', '-', text)\n", + " # Replace single spaces with underscores\n", + " text = text.replace(' ', '_')\n", + " # Handle leading and trailing underscores\n", + " return text.strip('_')\n" + ] + } + ], + "source": [ + "result = {\n", + " \"reflection\": \"The original code attempts to replace multiple spaces with a hyphen and single spaces with underscores, but it only checks for three consecutive spaces. This approach fails for cases with more than three consecutive spaces and does not handle leading and trailing spaces correctly. Additionally, the order of replacements can lead to incorrect results. A better approach is to use regular expressions to handle all cases in a single pass, ensuring that we replace multiple spaces with a hyphen and single spaces with underscores while also managing leading and trailing spaces appropriately.\",\n", + " \"refined_solution\": \"import re\\n\\ndef fix_spaces(text):\\n \\\"\\\"\\\"\\n Given a string text, replace all spaces in it with underscores, \\n and if a string has more than 2 consecutive spaces, \\n then replace all consecutive spaces with - \\n \\n fix_spaces(\\\"Example\\\") == \\\"Example\\\"\\n fix_spaces(\\\"Example 1\\\") == \\\"Example_1\\\"\\n fix_spaces(\\\" Example 2\\\") == \\\"_Example_2\\\"\\n fix_spaces(\\\" Example 3\\\") == \\\"_Example-3\\\"\\n \\\"\\\"\\\"\\n # Replace multiple spaces with a hyphen\\n text = re.sub(r' {3,}', '-', text)\\n # Replace single spaces with underscores\\n text = text.replace(' ', '_')\\n # Handle leading and trailing underscores\\n return text.strip('_')\"\n", + "}\n", + "print(result['refined_solution'])" + ] } ], "metadata": { diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 20a73a433..ef0414ff7 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -38,6 +38,7 @@ class ReviseMode(Enum): TAG = "CONTENT" +MODE_CODE_FILL = "code_fill" LANGUAGE_CONSTRAINT = "Language: Please use the same language as Human INPUT." FORMAT_CONSTRAINT = f"Format: output wrapped inside [{TAG}][/{TAG}] like format example, nothing else." @@ -149,8 +150,6 @@ class ActionNode: prevs: List["ActionNode"] # previous nodes nexts: List["ActionNode"] # next nodes - MODE_CODE_FILL = "code_fill" - def __init__( self, key: str, @@ -474,53 +473,26 @@ class ActionNode: """ model_class = self.create_class() fields = model_class.model_fields - + # Assuming there's only one field in the model if len(fields) == 1: return next(iter(fields)) - + # If there are multiple fields, we might want to use self.key to find the right one return self.key - - async def code_fill( - self, - context, - function_name=None, - timeout=USE_CONFIG_TIMEOUT - ): + + async def code_fill(self, context, function_name=None, timeout=USE_CONFIG_TIMEOUT): """ fill CodeBlock Node """ - def extract_code_from_response(response): - """ - Extracts code wrapped in triple backticks from the response, - removing any language specifier. - - :param response: The full response from the LLM - :return: The extracted code, or None if no code is found - """ - code_pattern = r"```(?:\w+\n)?([\s\S]*?)```" - matches = re.findall(code_pattern, response) - - if matches: - # The first group in the regex contains the code without the language specifier - code = matches[0].strip() - return code - return None - - import re field_name = self.get_field_name() prompt = context - # print("generate prompt", "\n", prompt) content = await self.llm.aask(prompt, timeout=timeout) - # print("generate content", "\n", content) extracted_code = sanitize(code=content, entrypoint=function_name) - # extracted_code = extract_code_from_response(content) result = {field_name: extracted_code} - # print("final_result", "\n", result) return result - + async def messages_fill( self, ): @@ -540,7 +512,7 @@ class ActionNode: images: Optional[Union[str, list[str]]] = None, timeout=USE_CONFIG_TIMEOUT, exclude=[], - function_name: str = None + function_name: str = None, ): """Fill the node(s) with mode. diff --git a/metagpt/actions/code_sanitize.py b/metagpt/actions/code_sanitize.py index 958c712df..56422589c 100644 --- a/metagpt/actions/code_sanitize.py +++ b/metagpt/actions/code_sanitize.py @@ -4,28 +4,35 @@ @Time : 2024/7/24 16:37 @Author : didi @File : code_node.py +@Acknowledgement https://github.com/evalplus/evalplus/blob/master/evalplus/sanitize.py """ -import os import ast -import pathlib import traceback - +from enum import Enum from typing import Dict, Generator, List, Optional, Set, Tuple import tree_sitter_python -from tqdm import tqdm from tree_sitter import Language, Node, Parser -CLASS_TYPE = "class_definition" -FUNCTION_TYPE = "function_definition" -IMPORT_TYPE = ["import_statement", "import_from_statement"] -IDENTIFIER_TYPE = "identifier" -ATTRIBUTE_TYPE = "attribute" -RETURN_TYPE = "return_statement" -EXPRESSION_TYPE = "expression_statement" -ASSIGNMENT_TYPE = "assignment" + +class NodeType(Enum): + CLASS = "class_definition" + FUNCTION = "function_definition" + IMPORT = ["import_statement", "import_from_statement"] + IDENTIFIER = "identifier" + ATTRIBUTE = "attribute" + RETURN = "return_statement" + EXPRESSION = "expression_statement" + ASSIGNMENT = "assignment" + def traverse_tree(node: Node) -> Generator[Node, None, None]: + """ + Traverse the tree structure starting from the given node. + + :param node: The root node to start the traversal from. + :return: A generator object that yields nodes in the tree. + """ cursor = node.walk() depth = 0 @@ -43,6 +50,7 @@ def traverse_tree(node: Node) -> Generator[Node, None, None]: else: depth -= 1 + def syntax_check(code, verbose=False): try: ast.parse(code) @@ -52,6 +60,7 @@ def syntax_check(code, verbose=False): traceback.print_exc() return False + def code_extract(text: str) -> str: lines = text.split("\n") longest_line_pair = (0, 0) @@ -68,22 +77,25 @@ def code_extract(text: str) -> str: return "\n".join(lines[longest_line_pair[0] : longest_line_pair[1] + 1]) + def get_definition_name(node: Node) -> str: for child in node.children: - if child.type == IDENTIFIER_TYPE: + if child.type == NodeType.IDENTIFIER.value: return child.text.decode("utf8") - + + def has_return_statement(node: Node) -> bool: traverse_nodes = traverse_tree(node) for node in traverse_nodes: - if node.type == RETURN_TYPE: + if node.type == NodeType.RETURN.value: return True return False + def get_deps(nodes: List[Tuple[str, Node]]) -> Dict[str, Set[str]]: def dfs_get_deps(node: Node, deps: Set[str]) -> None: for child in node.children: - if child.type == IDENTIFIER_TYPE: + if child.type == NodeType.IDENTIFIER.value: deps.add(child.text.decode("utf8")) else: dfs_get_deps(child, deps) @@ -104,12 +116,23 @@ def get_function_dependency(entrypoint: str, call_graph: Dict[str, str]) -> Set[ if current not in call_graph: continue for neighbour in call_graph[current]: - if not (neighbour in visited): + if neighbour not in visited: visited.add(neighbour) queue.append(neighbour) return visited + def sanitize(code: str, entrypoint: Optional[str] = None) -> str: + """ + Sanitize and extract relevant parts of the given Python code. + This function parses the input code, extracts import statements, class and function definitions, + and variable assignments. If an entrypoint is provided, it only includes definitions that are + reachable from the entrypoint in the call graph. + + :param code: The input Python code as a string. + :param entrypoint: Optional name of a function to use as the entrypoint for dependency analysis. + :return: A sanitized version of the input code, containing only relevant parts. + """ code = code_extract(code) code_bytes = bytes(code, "utf8") parser = Parser(Language(tree_sitter_python.language())) @@ -123,30 +146,24 @@ def sanitize(code: str, entrypoint: Optional[str] = None) -> str: definition_nodes = [] for child in root_node.children: - if child.type in IMPORT_TYPE: + if child.type in NodeType.IMPORT.value: import_nodes.append(child) - elif child.type == CLASS_TYPE: + elif child.type == NodeType.CLASS.value: name = get_definition_name(child) - if not ( - name in class_names or name in variable_names or name in function_names - ): + if not (name in class_names or name in variable_names or name in function_names): definition_nodes.append((name, child)) class_names.add(name) - elif child.type == FUNCTION_TYPE: + elif child.type == NodeType.FUNCTION.value: name = get_definition_name(child) - if not ( - name in function_names or name in variable_names or name in class_names - ) and has_return_statement(child): + if not (name in function_names or name in variable_names or name in class_names) and has_return_statement( + child + ): definition_nodes.append((name, child)) function_names.add(get_definition_name(child)) - elif ( - child.type == EXPRESSION_TYPE and child.children[0].type == ASSIGNMENT_TYPE - ): + elif child.type == NodeType.EXPRESSION.value and child.children[0].type == NodeType.ASSIGNMENT.value: subchild = child.children[0] name = get_definition_name(subchild) - if not ( - name in variable_names or name in function_names or name in class_names - ): + if not (name in variable_names or name in function_names or name in class_names): definition_nodes.append((name, subchild)) variable_names.add(name) @@ -161,7 +178,7 @@ def sanitize(code: str, entrypoint: Optional[str] = None) -> str: for pair in definition_nodes: name, node = pair - if entrypoint and not (name in reacheable): + if entrypoint and name not in reacheable: continue sanitized_output += code_bytes[node.start_byte : node.end_byte] + b"\n" return sanitized_output[:-1].decode("utf8") diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index cf490084d..7bed9e9b7 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -581,6 +581,31 @@ def write_json_file(json_file: str, data: list, encoding: str = None, indent: in json.dump(data, fout, ensure_ascii=False, indent=indent, default=to_jsonable_python) +def read_jsonl_file(jsonl_file: str, encoding="utf-8") -> list[dict]: + if not Path(jsonl_file).exists(): + raise FileNotFoundError(f"json_file: {jsonl_file} not exist, return []") + datas = [] + with open(jsonl_file, "r", encoding=encoding) as fin: + try: + for line in fin: + data = json.loads(line) + datas.append(data) + except Exception: + raise ValueError(f"read jsonl file: {jsonl_file} failed") + return datas + + +def add_jsonl_file(jsonl_file: str, data: list[dict], encoding: str = None, indent: int = 4): + folder_path = Path(jsonl_file).parent + if not folder_path.exists(): + folder_path.mkdir(parents=True, exist_ok=True) + + with open(jsonl_file, "a", encoding=encoding) as fout: + for json_item in data: + json_str = json.dumps(json_item, indent=indent) + fout.write(json_str + "\n") + + def read_csv_to_list(curr_file: str, header=False, strip_trail=True): """ Reads in a csv file to a list of list. If header is True, it returns a