From e0955c5bf93479ce63b79c152436c574fe070a5b Mon Sep 17 00:00:00 2001 From: didi <84363704+didiforgithub@users.noreply.github.com> Date: Tue, 16 Jul 2024 10:15:35 +0800 Subject: [PATCH] Update Sota baseline --- examples/ags/benchmark/gsm8k.py | 0 examples/ags/benchmark/humaneval.py | 9 +- examples/ags/benchmark/humaneval_mg.py | 239 ++++++++++++++++++++++ examples/ags/w_action_node/graph.py | 48 +++-- examples/ags/w_action_node/operator.py | 193 ++++++++++------- examples/ags/w_action_node/operator_an.py | 2 +- examples/ags/w_action_node/prompt.py | 61 +++++- 7 files changed, 442 insertions(+), 110 deletions(-) create mode 100644 examples/ags/benchmark/gsm8k.py create mode 100644 examples/ags/benchmark/humaneval_mg.py diff --git a/examples/ags/benchmark/gsm8k.py b/examples/ags/benchmark/gsm8k.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/ags/benchmark/humaneval.py b/examples/ags/benchmark/humaneval.py index 678102f17..9f63406ef 100644 --- a/examples/ags/benchmark/humaneval.py +++ b/examples/ags/benchmark/humaneval.py @@ -34,8 +34,8 @@ async def samples_generate(mode:str, result_path:str="samples.jsonl"): async def solve_and_write(case, mode): try: if mode == 'llm': - # solution_result = await generate_code_block(case['prompt']) - solution_result = await generate_code(case['prompt']) + solution_result = await generate_code_block(case['prompt']) + # solution_result = await generate_code(case['prompt']) sample_dict = { 'task_id': case['task_id'], 'solution': solution_result['code_solution'] @@ -70,10 +70,12 @@ async def samples_generate(mode:str, result_path:str="samples.jsonl"): jsonl_ranker(result_path, result_path) if not failed_tasks: + if automatic_evalplus(result_path): eval_path = result_path[:-6]+"_eval_results.json" unpassed_exapmle = extract_failure_tests(eval_path) print(unpassed_exapmle) + else: print(failed_tasks) @@ -112,6 +114,9 @@ async def samples_generate_llm(): write_jsonl("samples.jsonl", sample_list) +def hello(): + pass + def automatic_evalplus(result_path:str ="samples.jsonl"): """ 在命令行中自动执行 evalplus.evaluate --dataset humaneval --samples samples.jsonl --parallel 2 --base-only diff --git a/examples/ags/benchmark/humaneval_mg.py b/examples/ags/benchmark/humaneval_mg.py new file mode 100644 index 000000000..67db03bc8 --- /dev/null +++ b/examples/ags/benchmark/humaneval_mg.py @@ -0,0 +1,239 @@ +# Import necessary libraries and modules +import gzip +import itertools +import json +import os +import subprocess +from typing import Dict, Iterable, List, Union + +import numpy as np +import tqdm +from loguru import logger + +# Define the root directory as the location of the script +ROOT = os.path.dirname(os.path.abspath(__file__)) + +# Define the input data file containing human evaluations +HUMAN_EVAL = r"HumanEval.jsonl.gz" + + +def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]: + """ + Reads a JSONL file containing problem evaluations and returns them as a dictionary. + + Args: + evalset_file (str): Path to the JSONL file. + + Returns: + Dict[str, Dict]: A dictionary where task IDs are keys and problem details are values. + """ + return {task["task_id"]: task for task in stream_jsonl(evalset_file)} + + +def stream_jsonl(filename: str) -> Iterable[Dict]: + """ + Parses a JSONL file and yields each line as a dictionary. + + Args: + filename (str): Path to the JSONL file. + + Yields: + Iterable[Dict]: A generator of dictionaries representing JSONL lines. + """ + if filename.endswith(".gz"): + with open(filename, "rb") as gzfp: + with gzip.open(gzfp, "rt") as fp: + for line in fp: + if any(not x.isspace() for x in line): + yield json.loads(line) + else: + with open(filename, "r") as fp: + for line in fp: + if any(not x.isspace() for x in line): + yield json.loads(line) + + +def _generate_examples(filepath, split, name="sanitized"): + if name == "full": + + def _read_lines(fn, start, end): + data = [] + with open(fn, encoding="utf-8") as f: + for line in f: + sample = json.loads(line) + if start <= sample["task_id"] <= end: + data.append(sample) + elif sample["task_id"] > end: + break + return data + + if split == "test": + data = _read_lines(filepath, 11, 510) + elif split == "train": + data = _read_lines(filepath, 601, 974) + elif split == "validation": + data = _read_lines(filepath, 511, 600) + elif split == "prompt": + data = _read_lines(filepath, 1, 10) + + elif name == "sanitized": + with open(filepath, encoding="utf-8") as f: + data = json.load(f) + if split == "test": + data = [sample for sample in data if 11 <= sample["task_id"] <= 510] + elif split == "train": + data = [sample for sample in data if 601 <= sample["task_id"] <= 974] + elif split == "validation": + data = [sample for sample in data if 511 <= sample["task_id"] <= 600] + elif split == "prompt": + data = [sample for sample in data if 1 <= sample["task_id"] <= 10] + id_ = 0 + for sample in data: + yield id_, sample + id_ += 1 + + +def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): + """ + Writes an iterable of dictionaries to a JSONL file. + + Args: + filename (str): Path to the output JSONL file. + data (Iterable[Dict]): Data to write as JSONL. + append (bool): If True, appends to an existing file, else creates a new file. + """ + # Determine the file writing mode based on the 'append' flag + if append: + mode = "ab" + else: + mode = "wb" + filename = os.path.expanduser(filename) + + # Handle .gz compression + if filename.endswith(".gz"): + with open(filename, mode) as fp: + with gzip.GzipFile(fileobj=fp, mode="wb") as gzfp: + for x in data: + gzfp.write((json.dumps(x) + "\n").encode("utf-8")) + else: + with open(filename, mode) as fp: + for x in data: + fp.write((json.dumps(x) + "\n").encode("utf-8")) + + +def execution(task_id, check_program): + """ + Executes a Python program and captures its output. + + Args: + task_id: A unique identifier for the task. + check_program: The Python program to execute. + + Returns: + bool: True if the execution was successful, False otherwise. + """ + process = subprocess.Popen(["python", "-c", f"{check_program}"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + # Wait for the process to complete, with a timeout + stdout, stderr = process.communicate(timeout=30) + + if len(stderr) == 0: + # logger.info(f"{task_id}: passed") + passed = True + elif b"OK" in stderr: + # logger.info(f"{task_id}: passed, {stderr}") + passed = True + + else: + logger.info(f"{task_id}: error: {stderr}") + passed = False + except subprocess.TimeoutExpired: + logger.info("The command did not complete within the given timeout.") + process.kill() # Kill the process if it times out + logger.info(f"{task_id}: error") + passed = False + return passed + + +def estimate_pass_at_k( + num_samples: Union[int, List[int], np.ndarray], num_correct: Union[List[int], np.ndarray], k: int +) -> np.ndarray: + """ + Estimates pass@k of each problem and returns them in an array. + + Args: + num_samples: Number of total samples (can be an int, list, or NumPy array). + num_correct: Number of correct samples (list or NumPy array). + k (int): The 'k' value for pass@k. + + Returns: + np.ndarray: An array of pass rates for each problem. + """ + + # Define a pass rate estimator function + def estimator(n: int, c: int, k: int) -> float: + if n - c < k: + return 1.0 + return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) + + # Determine the number of samples based on the input type + if isinstance(num_samples, int): + num_samples_it = itertools.repeat(num_samples, len(num_correct)) + else: + assert len(num_samples) == len(num_correct) + num_samples_it = iter(num_samples) + + # Calculate pass rates for each problem + return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]) + + +def evaluate(total: List, correct: List, ks: List = [1, 10]): + """ + Evaluates and logs pass rates at various 'k' values. + + Args: + total (List): List of total samples. + correct (List): List of correct samples. + ks (List): List of 'k' values to evaluate. + + Returns: + dict: A dictionary of pass rates at each 'k' value. + """ + total = np.array(total) + correct = np.array(correct) + + # Calculate and log pass rates at each 'k' value + pass_at_k = {f"pass@{k}": estimate_pass_at_k(total, correct, k).mean() for k in ks if (total >= k).all()} + logger.info(pass_at_k) + return pass_at_k + + +if __name__ == "__main__": + logger.info("Reading samples...") + problems = read_problems(HUMAN_EVAL) + + total, correct = [], [] + passed = [] + + for sample in tqdm.tqdm(stream_jsonl("example_samples.jsonl")): + task_id = sample["task_id"] + completion = sample["completion"] + problem = problems[task_id] + + # Construct a check program + check_program = completion + "\n" + problem["test"] + "\n" + f"check({problem['entry_point']})" + + # Execute the check program and capture the result + passed_flg = execution(task_id, check_program) + + if not passed_flg: + logger.debug("error") + else: + logger.debug("passed") + passed.append(len(passed)) + + total.append(len(passed)) + correct.append(sum(passed)) + + # Evaluate pass rates at various 'k' values + evaluate(total, correct, ks=[1, 5, 10]) diff --git a/examples/ags/w_action_node/graph.py b/examples/ags/w_action_node/graph.py index 3db637188..0766ad94a 100644 --- a/examples/ags/w_action_node/graph.py +++ b/examples/ags/w_action_node/graph.py @@ -4,8 +4,8 @@ # @Desc : graph & an instance - humanevalgraph from metagpt.llm import LLM - -from examples.ags.w_action_node.operator import Generate, GenerateCode, GenerateCodeBlock, Review, Revise, FuEnsemble, MdEnsemble +from typing import List +from examples.ags.w_action_node.operator import Generate, GenerateCode, GenerateCodeBlock, Review, Revise, FuEnsemble, MdEnsemble, DbEnsemble class Graph: def __init__(self, name:str, llm:LLM) -> None: @@ -15,6 +15,9 @@ class Graph: def __call__(): NotImplementedError("Subclasses must implement __call__ method") + def optimize(dataset:List): + pass + class HumanEvalGraph(Graph): def __init__(self, name:str, llm: LLM, criteria:str, vote_count:int =5) -> None: super().__init__(name, llm) @@ -26,15 +29,15 @@ class HumanEvalGraph(Graph): self.fuensemble = FuEnsemble(llm=llm) self.mdensemble = MdEnsemble(llm=llm, vote_count=vote_count) - # async def __call__(self, problem:str, ensemble_count:int = 3): - # solution_list = [] - # for _ in range(ensemble_count): - # solution = await self.generate_code(problem) - # # solution = await self.generate_code_block(problem) - # solution = solution.get('code_solution') - # solution_list.append(solution) - # solution = await self.mdensemble("code", solution_list, problem) - # return solution + async def __call__(self, problem:str, ensemble_count:int = 3): + solution_list = [] + for _ in range(ensemble_count): + solution = await self.generate_code(problem) + # solution = await self.generate_code_block(problem) + solution = solution.get('code_solution') + solution_list.append(solution) + solution = await self.mdensemble("code", solution_list, problem) + return solution async def review_revise_ensemble(self, problem:str, ensemble_count:int = 2): solution_list = [] @@ -45,15 +48,15 @@ class HumanEvalGraph(Graph): return solution # async def simple_ensemble(self, problem:str, ensemble_count:int = 3): - async def __call__(self, problem:str, ensemble_count:int = 3): - solution_list = [] - for _ in range(ensemble_count): - solution = await self.generate_code(problem) - # solution = await self.generate_code_block(problem) - solution = solution.get('code_solution') - solution_list.append(solution) - solution = await self.fuensemble(solution_list, problem) - return solution + # async def __call__(self, problem:str, ensemble_count:int = 3): + # solution_list = [] + # for _ in range(ensemble_count): + # solution = await self.generate_code(problem) + # # solution = await self.generate_code_block(problem) + # solution = solution.get('code_solution') + # solution_list.append(solution) + # solution = await self.fuensemble(solution_list, problem) + # return solution async def single_solve(self, problem:str, max_loop:int): solution = await self.generate_code(problem) @@ -65,4 +68,7 @@ class HumanEvalGraph(Graph): solution = await self.revise(problem, solution, review_feedback['feedback']) solution = solution.get('revised_solution') return solution - \ No newline at end of file + + +class Gsm8kGraph(Graph): + pass \ No newline at end of file diff --git a/examples/ags/w_action_node/operator.py b/examples/ags/w_action_node/operator.py index ca0d1f85e..5b5a48875 100644 --- a/examples/ags/w_action_node/operator.py +++ b/examples/ags/w_action_node/operator.py @@ -11,10 +11,10 @@ from metagpt.actions.action_node import ActionNode from metagpt.llm import LLM from examples.ags.w_action_node.operator_an import GenerateOp, GenerateCodeOp, GenerateCodeBlockOp ,ReviewOp, ReviseOp, FuEnsembleOp, MdEnsembleOp -from examples.ags.w_action_node.prompt import GENERATE_PROMPT, GENERATE_CODE_PROMPT, REVIEW_PROMPT, REVISE_PROMPT, FU_ENSEMBLE_PROMPT, MD_ENSEMBLE_PROMPT, DE_ENSEMBLE_ANGEL_PROMPT, DE_ENSEMBLE_DEVIL_PROMPT, DE_ENSEMBLE_JUDGE_PROMPT - +from examples.ags.w_action_node.prompt import GENERATE_PROMPT, GENERATE_CODE_PROMPT, GENERATE_CODEBLOCK_PROMPT, REVIEW_PROMPT, REVISE_PROMPT, FU_ENSEMBLE_PROMPT, MD_ENSEMBLE_PROMPT, DE_ENSEMBLE_ANGEL_PROMPT, DE_ENSEMBLE_DEVIL_PROMPT, DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT, DE_ENSEMBLE_JUDGE_FINAL_PROMPT +from examples.ags.w_action_node.prompt import DE_ENSEMBLE_CODE_FORMAT_PROMPT, DE_ENSEMBLE_TXT_FORMAT_PROMPT class Operator: - def __init__(self, name, llm:LLM=None): + def __init__(self, name, llm:LLM): self.name = name self.llm = llm @@ -48,7 +48,7 @@ class GenerateCodeBlock(Operator): super().__init__(name, llm) async def __call__(self, problem_description): - prompt = GENERATE_CODE_PROMPT.format(problem_description=problem_description) + prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem_description) node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(context=prompt, llm=self.llm,mode='code_fill') response = node.instruct_content.model_dump() return response @@ -91,6 +91,10 @@ class FuEnsemble(Operator): return response class MdEnsemble(Operator): + """ + MedPrompt + + """ def __init__(self, name:str ="MedEnsembler", llm: LLM = LLM(), vote_count:int=3): super().__init__(name, llm) self.vote_count = vote_count @@ -172,87 +176,120 @@ class DbEnsemble(Operator): """ def __init__(self, name:str ="DebateEnsemble", llm: LLM = LLM()): super().__init__(name, llm) - self.agents = [ - ] - - async def debate_answer(self, message_history:List, role:str): + self.agents = ["angel","devil","judge"] + self.format_requirements = { + "txt":DE_ENSEMBLE_TXT_FORMAT_PROMPT, + "code":DE_ENSEMBLE_CODE_FORMAT_PROMPT + } + + def get_system_prompt(self, name:str, mode:str='txt'): + if name == "angel": + if mode == "code": + return DE_ENSEMBLE_ANGEL_PROMPT + "\n" + DE_ENSEMBLE_CODE_FORMAT_PROMPT + return DE_ENSEMBLE_ANGEL_PROMPT + "\n" + DE_ENSEMBLE_TXT_FORMAT_PROMPT + elif name == "devil": + if mode == "code": + return DE_ENSEMBLE_DEVIL_PROMPT + "\n" + DE_ENSEMBLE_CODE_FORMAT_PROMPT + return DE_ENSEMBLE_DEVIL_PROMPT + "\n" + DE_ENSEMBLE_TXT_FORMAT_PROMPT + elif name == "judge": + if mode == "final": + return DE_ENSEMBLE_JUDGE_FINAL_PROMPT + return DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT + + def construct_messages(self, message_history_with_name, name, mode:str="txt", phase:str="universal"): """ - async def lowlevel_api_example(llm: LLM): - logger.info("low level api example") - logger.info(await llm.aask_batch(["hi", "write python hello world."])) - - hello_msg = [{"role": "user", "content": "count from 1 to 10. split by newline."}] - logger.info(await llm.acompletion(hello_msg)) - logger.info(await llm.acompletion_text(hello_msg)) - - # streaming mode, much slower - await llm.acompletion_text(hello_msg, stream=True) - - # check completion if exist to test llm complete functions - if hasattr(llm, "completion"): - logger.info(llm.completion(hello_msg)) + 基于name与mode来构建system message. + 基于name来构建messages """ - if role == "angel": - prompt = DE_ENSEMBLE_ANGEL_PROMPT.format() - Op = "" - else: - prompt = DE_ENSEMBLE_DEVIL_PROMPT.format() - Op = "" + messages = [] + messages.append({"role": "system", "content": self.get_system_prompt(name, mode)}) + + if name in ["angel", "devil"]: + messages = self._construct_debate(message_history_with_name, name, messages) + elif name == "judge": + messages = self._construct_judge(message_history_with_name, mode, messages) + return messages + + def _construct_debate(self, message_history_with_name, name, messages): + user_message = "" - node = await ActionNode.from_pydantic(Op).messages_fill(messages=message_history,llm=self.llm) - node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm) - response = node.instruct_content.model_dump() - return response + for message in message_history_with_name: + if message["name"] == "Judge": + continue + elif message["name"] == name: + if user_message: + messages.append({ + "role": "user", + "name": "user", + "content": user_message.strip("\n"), + }) + messages.append({ + "role": "assistant", + "name": name, + "content": message["content"], + }) + user_message = "" + else: + user_message += message["content"] + + if user_message: + messages.append({ + "role": "user", + "name": "user", + "content": user_message.strip("\n"), + }) + + return messages - async def judge_answer(message_histroy:List): - """ - - """ + def _construct_judge(self, message_history_with_name, mode, messages): pass - async def __call__(self, origin_solution:str, problem_description:str, max_round:int = 3): + async def debate_answer(self, message_history:List, role:str="angel"): + messages = self.construct_messages(message_history, role) + response = await self.llm.acompletion_text(messages=messages) + message_history.append({ + "role":"user", + "name":role, + "content":response} + ) + return message_history, response + + async def judge_answer(self, message_history:List, phase:str="universal"): + messages = self.construct_messages(message_history, "judge", phase=phase) + response = await self.llm.acompletion_text(messages=messages) + message_history.append({ + "role": "user", + "name": "judge", + "content": response} + ) + return message_history, response + + async def __call__(self, origin_solution:str, problem_description:str, max_round:int = 3, mode:str='txt'): # 思路,输入一个原始答案,构建一个agent代表这个答案进行辩论;另一个agent(devil)使用debate llm的内容进行辩论;法官在每一轮次做出决定是否终止,到了maxround还没终止就由法官进行总结。 - # 以下是调用llm的方法 - """ - 1. judge信息只有法官自己看到 - 2. agent answer信息所有人都能看到,具体代码逻辑在debate - """ - # 在MG里面多轮对话传Message在哪里传,预计时间1小时左右吧 - - angel_prompt = DE_ENSEMBLE_ANGEL_PROMPT.format() - devil_prompt = DE_ENSEMBLE_DEVIL_PROMPT.format() - judge_prompt = DE_ENSEMBLE_JUDGE_PROMPT.format() - ''' - Devil - You agree with my answer 90% of the time and have almost no reservations. Affirm your agreement, share any additional thoughts if you have them, and conclude with the capital letter corresponding to your answer at the end of your response. - - Angel - Do you agree with my perspective? Please provide your reasons and answer. - - Judge - final_mode: "You, as the moderator, will evaluate both sides' answers and determine your - preference for an answer candidate. Please summarize your reasons for supporting affirmative/negative side and - give the final answer that you think is correct to conclude the debate. Now please output your answer in json format, with the format as follows: - {\"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}. - Please strictly output in JSON format, do not output irrelevant content." - - universal_mode: "You, as the moderator, will evaluate both sides' answers and determine if there is a clear - preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and - give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to - the next round. Now please output your answer in json format, with the format as follows: - {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", - \"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}. - Please strictly output in JSON format, do not output irrelevant content." - ''' - - # 在action node 之中构建一个能够传递message history的方法。 - for _ in max_round: + message_history_with_name = [ + {"role":"user", "name":"angel", "content":origin_solution} + ] + + for index in range(max_round): for agent in self.agents: - pass + if agent == "angel": + if index == 0: + pass + message_history_with_name, rsp = self.debate_answer(message_history_with_name, role="angel") + elif agent == "devil": + message_history_with_name, rsp = self.debate_answer(message_history_with_name, role="devil") + elif agent == "judge": + message_history_with_name, judge_result = self.judge_answer(message_history_with_name, phase="universal") + if not judge_result["is_debating"]: + """ + 这里需要在 self.judge_answer 中设置一个自动给出solution的地方 + """ + return {"final_solution":judge_result["final_solution"]} + + message_history_with_name.pop(-1) + message_history_with_name, judge_answer = self.judge_answer(message_history_with_name, phase="final") - node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm) - response = node.instruct_content.model_dump() - return response + return {"final_solution":judge_answer["debate_answer"]} class Rephrase(Operator): """ @@ -276,4 +313,6 @@ class CodeReflection(Operator): class Verify(Operator): """ ? 还没有想好 - """ \ No newline at end of file + """ + pass + diff --git a/examples/ags/w_action_node/operator_an.py b/examples/ags/w_action_node/operator_an.py index 790492d5c..7560614ea 100644 --- a/examples/ags/w_action_node/operator_an.py +++ b/examples/ags/w_action_node/operator_an.py @@ -9,7 +9,7 @@ class GenerateOp(BaseModel): solution: str = Field(default="", description="Your Solution for this problem") class GenerateCodeOp(BaseModel): - code_solution: str = Field(default="", description="Your complete code solution for this problem") + code_solution: str = Field(default="", description="Complete and correct code here.") class GenerateCodeBlockOp(BaseModel): code_solution: str = Field(default="", description="Your complete code solution for this problem") diff --git a/examples/ags/w_action_node/prompt.py b/examples/ags/w_action_node/prompt.py index e186aafb7..dc92455aa 100644 --- a/examples/ags/w_action_node/prompt.py +++ b/examples/ags/w_action_node/prompt.py @@ -26,23 +26,27 @@ Generate Solution for the following problem: {problem_description} # """ GENERATE_CODE_PROMPT = """ -You are an expert programmer tasked with solving a coding problem. Your goal is to write clean, efficient, and correct code that solves the given problem. +You are an expert programmer tasked with solving a coding problem. ### Problem Description: {problem_description} ### Instructions: -1. Read the problem description carefully. -2. If any part of the problem is unclear, state your assumptions. -3. Plan your approach before writing code. -4. Write a Python function that solves the problem. -5. Include clear comments to explain your logic. -6. Ensure your code handles edge cases and potential errors. -7. If time complexity is a concern, optimize your solution and explain your optimization. - +The above is an incomplete Python code fragment. Return the complete and correct code with no additional text. Please maintain the JSON format in your response. ### Your Response: + """ +GENERATE_CODEBLOCK_PROMPT = """ +You are an expert programmer tasked with solving a coding problem. + +### Problem Description: +{problem_description} + +### Instructions: +The above is an incomplete Python code fragment. Return the complete and correct code with no additional text. +""" + # GENERATE_CODE_PROMPT = """ # Generate Code Solution for the following problem: {problem_description} # """ @@ -100,3 +104,42 @@ Carefully analyze the given problem and the list of solution candidates. Your ta Please maintain the JSON format in your response. """ +DE_ENSEMBLE_TXT_FORMAT_PROMPT = """ +Now please output your answer in json format, with the format as follows: + {\"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}. +Please strictly output in JSON format, do not output irrelevant content. """ + +DE_ENSEMBLE_CODE_FORMAT_PROMPT = """ +Now please output your answer in json format, with the format as follows: +{{ + "reason":"<为什么要这样做>", + "code_solution":"<你觉得合适的solution,用代码表示出来>" +}} +Please strictly output in JSON format, do not output irrelevant content. """ + +DE_ENSEMBLE_ANGEL_PROMPT = """ +Do you agree with my perspective? Please provide your reasons and answer. +""" + +DE_ENSEMBLE_DEVIL_PROMPT = """ +You agree with my answer 90% of the time and have almost no reservations. Affirm your agreement, share any additional thoughts if you have them, and conclude with the capital letter corresponding to your answer at the end of your response. +""" + +DE_ENSEMBLE_JUDGE_FINAL_PROMPT = """ +You, as the moderator, will evaluate both sides' answers and determine your + preference for an answer candidate. Please summarize your reasons for supporting affirmative/negative side and + give the final answer that you think is correct to conclude the debate. Now please output your answer in json format, with the format as follows: + {\"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}. + Please strictly output in JSON format, do not output irrelevant content. +""" + +DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT = """ +You, as the moderator, will evaluate both sides' answers and determine if there is a clear + preference for an answer candidate. If so, please summarize your reasons for supporting affirmative/negative side and + give the final answer that you think is correct, and the debate will conclude. If not, the debate will continue to + the next round. Now please output your answer in json format, with the format as follows: + {\"Whether there is a preference\": \"Yes or No\", \"Supported Side\": \"Affirmative or Negative\", + \"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}. + Please strictly output in JSON format, do not output irrelevant content +""" +