Update Operator's code

This commit is contained in:
didi 2024-10-21 11:27:50 +08:00
parent 478589e1c7
commit ade10684b7
3 changed files with 272 additions and 326 deletions

View file

@ -4,36 +4,36 @@
# @Desc : operator demo of aflow
import random
import sys
import asyncio
import traceback
from collections import Counter
from typing import Dict, List, Tuple
import concurrent.futures
import threading
from tenacity import retry, stop_after_attempt, wait_fixed
from examples.aflow.scripts.utils import extract_test_cases_from_jsonl
from examples.aflow.scripts.operator_an import (
CodeGenerateOp,
FormatOp,
GenerateOp,
MdEnsembleOp,
CodeGenerateOp,
AnswerGenerateOp,
ScEnsembleOp,
ReflectionTestOp,
MdEnsembleOp,
ReviewOp,
ReviseOp,
ScEnsembleOp,
)
from examples.aflow.scripts.prompt import (
CONTEXTUAL_GENERATE_PROMPT,
FORMAT_PROMPT,
GENERATE_CODEBLOCK_PROMPT,
GENERATE_PROMPT, # TODO
MD_ENSEMBLE_PROMPT,
ANSWER_GENERATION_PROMPT,
SC_ENSEMBLE_PROMPT,
PYTHON_CODE_VERIFIER_PROMPT,
REFLECTION_ON_PUBLIC_TEST_PROMPT,
MD_ENSEMBLE_PROMPT,
REVIEW_PROMPT,
REVISE_PROMPT,
SC_ENSEMBLE_PROMPT,
)
from examples.aflow.scripts.utils import test_case_2_test_function
from metagpt.actions.action_node import ActionNode
@ -42,159 +42,50 @@ from metagpt.logs import logger
class Operator:
def __init__(self, name, llm: LLM):
def __init__(self, llm: LLM, name: str):
self.name = name
self.llm = llm
def __call__(self, *args, **kwargs):
raise NotImplementedError
async def _fill_node(self, op_class, prompt, mode=None, **extra_kwargs):
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
fill_kwargs.update(extra_kwargs)
node = await ActionNode.from_pydantic(op_class).fill(**fill_kwargs)
return node.instruct_content.model_dump()
class Custom(Operator):
def __init__(self, llm: LLM, name: str = "Custom"):
super().__init__(name, llm)
async def __call__(self, input, instruction, mode: str = None):
prompt = input + instruction
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class Generate(Operator):
def __init__(self, llm: LLM, name: str = "Generate"):
super().__init__(name, llm)
async def __call__(self, problem, mode: str = None):
prompt = GENERATE_PROMPT.format(problem_description=problem)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class ContextualGenerate(Operator):
def __init__(self, llm: LLM, name: str = "ContextualGenerate"):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def __call__(self, problem, context, mode: str = None):
prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem, thought=context)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class CodeGenerate(Operator):
def __init__(self, name: str = "CodeGenerate", llm: LLM = LLM()):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def __call__(self, problem, function_name, mode: str = None):
prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem)
fill_kwargs = {"context": prompt, "llm": self.llm, "function_name": function_name}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response # {"code": "xxx"}
class Format(Generate):
def __init__(self, name: str = "Format", llm: LLM = LLM()):
super().__init__(llm, name)
async def __call__(self, problem, solution, mode: str = None):
prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(FormatOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
async def __call__(self, input, instruction):
prompt = instruction + input
response = await self._fill_node(GenerateOp, prompt, mode="single_fill")
return response
class AnswerGenerate(Operator):
def __init__(self, llm: LLM, name: str = "AnswerGenerate"):
super().__init__(llm, name)
async def __call__(self, input: str, mode: str = None) -> Tuple[str, str]:
prompt = ANSWER_GENERATION_PROMPT.format(input=input)
response = await self._fill_node(AnswerGenerateOp, prompt, mode="context_fill")
return response
class Review(Operator):
def __init__(self, criteria: str = "accuracy", name: str = "Review", llm: LLM = LLM()):
self.criteria = criteria
super().__init__(name, llm)
class CustomCodeGenerate(Operator):
def __init__(self, llm: LLM, name: str = "CustomCodeGenerate"):
super().__init__(llm, name)
async def __call__(self, problem, solution, mode: str = None):
prompt = REVIEW_PROMPT.format(problem_description=problem, solution=solution, criteria=self.criteria)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(ReviewOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
async def __call__(self, problem, entry_point, instruction):
prompt = instruction + problem
response = await self._fill_node(GenerateOp, prompt, mode="code_fill", function_name=entry_point)
return response
class Revise(Operator):
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, problem, solution, feedback, mode: str = None):
prompt = REVISE_PROMPT.format(problem_description=problem, solution=solution, feedback=feedback)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(ReviseOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class MdEnsemble(Operator):
"""
Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
Link: https://arxiv.org/abs/2311.16452
"""
def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3):
super().__init__(name, llm)
self.vote_count = vote_count
@staticmethod
def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
shuffled_solutions = solutions.copy()
random.shuffle(shuffled_solutions)
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
return shuffled_solutions, answer_mapping
async def __call__(self, solutions: List[str], problem: str, mode: str = None):
logger.info(f"solution count: {len(solutions)}")
all_responses = []
for _ in range(self.vote_count):
shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
solution_text = ""
for index, solution in enumerate(shuffled_solutions):
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
answer = response.get("solution_letter", "")
answer = answer.strip().upper()
if answer in answer_mapping:
original_index = answer_mapping[answer]
all_responses.append(original_index)
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
final_answer = solutions[most_frequent_index]
return {"solution": final_answer}
class ScEnsemble(Operator):
"""
Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models
@ -203,31 +94,118 @@ class ScEnsemble(Operator):
Link: https://arxiv.org/abs/2311.17311
"""
def __init__(self, name: str = "ScEnsemble", llm: LLM = LLM()):
super().__init__(name, llm)
def __init__(self, llm: LLM, name: str = "ScEnsemble"):
super().__init__(llm, name)
async def __call__(self, solutions: List[str], problem: str, mode: str = None):
async def __call__(self, solutions: List[str]):
answer_mapping = {}
solution_text = ""
for index, solution in enumerate(solutions):
answer_mapping[chr(65 + index)] = index
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(ScEnsembleOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text)
response = await self._fill_node(ScEnsembleOp, prompt, mode="context_fill")
answer = response.get("solution_letter", "")
answer = answer.strip().upper()
return {"solution": solutions[answer_mapping[answer]]}
return {"response": solutions[answer_mapping[answer]]}
def run_code(code):
try:
# Create a new global namespace
global_namespace = {}
disallowed_imports = [
"os", "sys", "subprocess", "multiprocessing",
"matplotlib", "seaborn", "plotly", "bokeh", "ggplot",
"pylab", "tkinter", "PyQt5", "wx", "pyglet"
]
# Check for prohibited imports
for lib in disallowed_imports:
if f"import {lib}" in code or f"from {lib}" in code:
logger.info("Detected prohibited import: %s", lib)
return "Error", f"Prohibited import: {lib} and graphing functionalities"
# Use exec to execute the code
exec(code, global_namespace)
# Assume the code defines a function named 'solve'
if 'solve' in global_namespace and callable(global_namespace['solve']):
result = global_namespace['solve']()
return "Success", str(result)
else:
return "Error", "Function 'solve' not found"
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
return "Error", f"Execution error: {str(e)}\n{''.join(tb_str)}"
class Programmer(Operator):
def __init__(self, llm: LLM, name: str = "Programmer"):
super().__init__(llm, name)
async def exec_code(self, code, timeout=30):
"""
Asynchronously execute code and return an error if timeout occurs.
"""
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
try:
# Submit run_code task to the process pool
future = loop.run_in_executor(executor, run_code, code)
# Wait for the task to complete or timeout
result = await asyncio.wait_for(future, timeout=timeout)
return result
except asyncio.TimeoutError:
# Timeout, attempt to shut down the process pool
executor.shutdown(wait=False, cancel_futures=True)
return "Error", "Code execution timed out"
except Exception as e:
return "Error", f"Unknown error: {str(e)}"
async def code_generate(self, problem, analysis, feedback, mode):
"""
Asynchronous method to generate code.
"""
prompt = PYTHON_CODE_VERIFIER_PROMPT.format(
problem=problem,
analysis=analysis,
feedback=feedback
)
response = await self._fill_node(CodeGenerateOp, prompt, mode, function_name="solve")
return response
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
async def __call__(self, problem: str, analysis: str = "None"):
"""
Call method, generate code and execute, retry up to 3 times.
"""
code = None
output = None
feedback = ""
for i in range(3):
code_response = await self.code_generate(problem, analysis, feedback, mode="code_fill")
code = code_response.get("code")
if not code:
return {"code": code, "output": "No code generated"}
status, output = await self.exec_code(code)
if status == "Success":
return {"code": code, "output": output}
else:
print(f"Execution error on attempt {i + 1}, error message: {output}")
feedback = (
f"\nThe result of the error from the code you wrote in the previous round:\n"
f"Code: {code}\n\nStatus: {status}, {output}"
)
return {"code": code, "output": output}
class Test(Operator):
def __init__(self, llm, name: str = "Test"):
super().__init__(name, llm)
def __init__(self, llm: LLM, name: str = "Test"):
super().__init__(llm, name)
def exec_code(self, solution, entry_point):
@ -282,8 +260,7 @@ class Test(Operator):
exec_pass=f"executed unsuccessfully, error: \n {result}",
test_fail="executed unsucessfully",
)
node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm, mode="code_fill")
response = node.instruct_content.model_dump()
response = await self._fill_node(ReflectionTestOp, prompt, mode="code_fill")
solution = response["reflection_and_solution"]
else:
prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
@ -292,8 +269,7 @@ class Test(Operator):
exec_pass="executed successfully",
test_fail=result,
)
node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm, mode="code_fill")
response = node.instruct_content.model_dump()
response = await self._fill_node(ReflectionTestOp, prompt, mode="code_fill")
solution = response["reflection_and_solution"]
result = self.exec_code(solution, entry_point)
@ -301,74 +277,75 @@ class Test(Operator):
return {"result": True, "solution": solution}
else:
return {"result": False, "solution": solution}
class Programmer(Operator):
def __init__(self, llm: LLM, name: str = "Programmer"):
super().__init__(name, llm)
class Format(Operator):
def __init__(self, llm: LLM, name: str = "Format"):
super().__init__(llm, name)
async def exec_code(code, timeout=180):
def run_code():
try:
# Create a new global namespace
global_namespace = {}
# Use exec to execute the code
exec(code, global_namespace)
# Assume the code defines a function named 'solve'
if 'solve' in global_namespace:
result = global_namespace['solve']()
return "Success", str(result)
else:
return "Error", "Function 'solve' not found"
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
return "Error", f"Execution error: {str(e)}\n{''.join(tb_str)}"
async def __call__(self, problem, solution, mode: str = None):
prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
response = await self._fill_node(FormatOp, prompt, mode)
return response
# Create an event to mark task completion
done_event = threading.Event()
result = ["Error", "Execution resulted in no output, subprocess exception"]
def wrapper():
nonlocal result
result = run_code()
done_event.set()
class Review(Operator):
def __init__(self, llm: LLM, name: str = "Review"):
super().__init__(llm, name)
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(wrapper)
try:
# Wait for task completion or timeout
if done_event.wait(timeout=timeout):
return result
else:
# Timeout, attempt to cancel the task
future.cancel()
return "Error", "Code execution timed out"
finally:
# Ensure the thread pool is properly shut down
executor.shutdown(wait=False)
async def code_generate(self, problem, analysis, feedback, mode):
prompt = PYTHON_CODE_VERIFIER_PROMPT.format(problem=problem, analysis=analysis, feedback=feedback)
fill_kwargs = {"context": prompt, "llm": self.llm, "function_name": "solve"}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
async def __call__(self, problem, solution, mode: str = None):
prompt = REVIEW_PROMPT.format(problem=problem, solution=solution)
response = await self._fill_node(ReviewOp, prompt, mode="context_fill")
return response
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
async def __call__(self, problem: str, analysis: str = "None"):
code = None
feedback = ""
for i in range(3):
code = await self.code_generate(problem, analysis, feedback, mode="code_fill")
code = code["code"]
status, output = await self.exec_code(code)
if status == "Success":
return {"code": code, "output": output}
else:
logger.info(f"Execution error in attempt {i + 1}, error message: {output}")
feedback = f"\nThe result of the error from the code you wrote in the previous round:\nCode:{code}\n\nStatus:{status},{output}"
return {"code": code, "output": "error"}
class Revise(Operator):
def __init__(self, llm: LLM, name: str = "Revise"):
super().__init__(llm, name)
async def __call__(self, problem, solution, feedback, mode: str = None):
prompt = REVISE_PROMPT.format(problem=problem, solution=solution, feedback=feedback)
response = await self._fill_node(ReviseOp, prompt, mode="context_fill")
return response
class MdEnsemble(Operator):
"""
Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
Link: https://arxiv.org/abs/2311.16452
"""
def __init__(self, llm: LLM, name: str = "MdEnsemble", vote_count: int = 5):
super().__init__(llm, name)
self.vote_count = vote_count
@staticmethod
def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
shuffled_solutions = solutions.copy()
random.shuffle(shuffled_solutions)
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
return shuffled_solutions, answer_mapping
async def __call__(self, solutions: List[str], problem: str, mode: str = None):
print(f"solution count: {len(solutions)}")
all_responses = []
for _ in range(self.vote_count):
shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
solution_text = ""
for index, solution in enumerate(shuffled_solutions):
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, question=problem)
response = await self._fill_node(MdEnsembleOp, prompt, mode="context_fill")
answer = response.get("solution_letter", "A")
answer = answer.strip().upper()
if answer in answer_mapping:
original_index = answer_mapping[answer]
all_responses.append(original_index)
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
final_answer = solutions[most_frequent_index]
return {"solution": final_answer}

View file

@ -5,41 +5,35 @@
from pydantic import BaseModel, Field
class GenerateOp(BaseModel):
response: str = Field(default="", description="Your solution for this problem")
class CodeGenerateOp(BaseModel):
code: str = Field(default="", description="Your complete code solution for this problem")
class AnswerGenerateOp(BaseModel):
thought: str = Field(default="", description="The step by step thinking process")
answer: str = Field(default="", description="The final answer to the question")
class FormatOp(BaseModel):
solution: str = Field(default="", description="Your formatted answer for this problem")
class ScEnsembleOp(BaseModel):
thought: str = Field(default="", description="The thought of the most consistent solution.")
solution_letter: str = Field(default="", description="The letter of most consistent solution.")
class ReflectionTestOp(BaseModel):
reflection_and_solution: str = Field(default="", description="Corrective solution for code execution errors or test case failures")
class MdEnsembleOp(BaseModel):
thought: str = Field(default="", description="Step-by-step analysis of the solutions to determine the best one.")
solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
class ReviewOp(BaseModel):
review_result: bool = Field(
default=False,
description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
)
feedback: str = Field(
default="",
description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
)
review_result: bool = Field(default=False, description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'")
feedback: str = Field(default="",description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.")
class ReviseOp(BaseModel):
solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
class MdEnsembleOp(BaseModel):
thought: str = Field(
default="",
description="Step-by-step analysis of the solutions to determine the best one.",
)
solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
class ScEnsembleOp(BaseModel):
solution_letter: str = Field(default="", description="The letter of most consistent solution.")
class ReflectionTestOp(BaseModel):
reflection_and_solution: str = Field(
default="", description="Corrective solution for code execution errors or test case failures"
)

View file

@ -3,26 +3,11 @@
# @Author : didi
# @Desc : prompts of operators
CONTEXTUAL_GENERATE_PROMPT = """
Generate Solution for the following problem:
## Problem Description
{problem_description}
## Thought
{thought}
"""
GENERATE_CODEBLOCK_PROMPT = """
Please provide a self-contained Python script that solves the following problem in a markdown code block:
{problem_description}
When creating your solution:
1. Consider all edge cases and boundary conditions.
2. Avoid oversimplification - address all aspects of the problem.
3. Ensure your logic covers all stated requirements.
4. Avoid adding additional test cases beyond those provided in the problem description.
ANSWER_GENERATION_PROMPT = """
Think step by step and solve the problem.
1. In the "thought" field, explain your thinking process in detail.
2. In the "answer" field, provide the final answer concisely and clearly. The answer should be a direct response to the question, without including explanations or reasoning.
Your task: {input}
"""
FORMAT_PROMPT = """
@ -31,59 +16,32 @@ please extract a short and concise answer contains only one word/few words from
Make sure there are no additional comments or explanations in your response.
"""
REVIEW_PROMPT = """
For the question described as {problem_description},
please review the following solution: {solution}, and provide a review result in boolean format.
```
You will be reviewing the problem-solving process of another AI assistant that has answered a mathematical question. Your task is to evaluate the solution and provide a detailed review for refinement. Follow these steps:
<step1>
Carefully read through the original question and entire solution, paying close attention to the relevant concepts, thinking process, calculations, and final result. Assess whether the solution is clear, logical, and well-organized. Write your initial review in <initialReview> tags.
</step1>
<step2>
Evaluate the reasoning and logic behind the solution. Ensure that the thinking process is clear, coherent, and mathematically sound. If you find any areas that need clarification or improvement, provide your suggestions inside <reasoningFeedback> tags.
</step2>
<step3>
Re-do the calculations presented in the <calculation> section **carefully and step-by-step** to verify the accuracy. Break down the calculations into the simplest possible steps and check each step for errors. You must not be careless and treat every part with rigor. Don't neglect checking any calculation part of the solution process. If you find any mistakes, note them down inside <calculationErrors> tags.
</step3>
<step4>
Provide an overall assessment of the solution's thoroughness, accuracy, and clarity inside <overallAssessment> tags. Highlight the strengths and weaknesses of the solution and offer suggestions for improvement, if any.
</step4>
use XML tags to present your complete evaluation, including initial review, calculation errors, reasoning feedback, and overall assessment, in a well-organized and easy-to-follow format.
Remember to be thorough, constructive, and professional in your review. Your goal is to help improve the quality and accuracy of the mathematical problem-solving process.
```
If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments
"""
REVISE_PROMPT = """
For the question described as {problem_description},
please evaluate and revise the solution provided: {solution}, taking into account the review feedbacks: {feedback}."
Then output the revised solution.
"""
MD_ENSEMBLE_PROMPT = """
You are given a problem:
{problem_description}
Here is a list of possible solutions to the problem:
{solutions}
Using the inputs above, your goal is to choose the best solution to the problem.
The main consideration is that the solution can fully solve the problem in a correct and robust manner.
Provide your final decision by writing the chosen solution letter.
Please follow the required format in your response.
"""
SC_ENSEMBLE_PROMPT = """
I have generated the following solutions to the question: {problem_description}
Given the question described as follows: {question}
Several solutions have been generated to address the given question. They are as follows:
{solutions}
Evaluate these solutions.
Select the most consistent solution based on majority consensus.
Give your answer with a single id of solution (without anything else).
Carefully evaluate these solutions and identify the answer that appears most frequently across them. This consistency in answers is crucial for determining the most reliable solution.
In the "thought" field, provide a detailed explanation of your thought process. In the "solution_letter" field, output only the single letter ID (A, B, C, etc.) corresponding to the most consistent solution. Do not include any additional text or explanation in the "solution_letter" field.
"""
PYTHON_CODE_VERIFIER_PROMPT = """
You are a professional Python programmer. Your task is to write complete, self-contained code based on a given mathematical problem and output the answer. The code should include all necessary imports and dependencies, and be ready to run without additional setup or environment configuration.
Problem description: {problem}
Other analysis: {analysis}
{feedback}
Your code should:
1. Implement the calculation steps described in the problem.
2. Define a function named `solve` that performs the calculation and returns the result. The `solve` function should not require any input parameters; instead, it should obtain all necessary inputs from within the function or from globally defined variables.
3. `solve` function return the final calculation result.
Please ensure your code is efficient, well-commented, and follows Python best practices. The output should be limited to basic data types such as strings, integers, and floats. It is prohibited to transmit images or other file formats. The code output is intended for a text-based language model.
"""
REFLECTION_ON_PUBLIC_TEST_PROMPT = """
Given a code problem and a python code solution which failed to pass test or execute, you need to analyze the reason for the failure and propose a better code solution.:
### problem
@ -101,14 +59,31 @@ Given a code problem and a python code solution which failed to pass test or exe
Please provide a reflection on the failed test cases and code solution, followed by a better code solution without any additional text or test cases.
"""
PYTHON_CODE_VERIFIER_PROMPT = """You are a professional Python programmer. Your task is to write Python code based on the user's request. Make sure to add appropriate explanations and your personal thought process to your code. Additionally, all code should be encapsulated in Python code blocks.
MD_ENSEMBLE_PROMPT = """
Given the question described as follows: {question}
Several solutions have been generated to address the given question. They are as follows:
{solutions}
The packages you can use include: numpy, scipy, pandas, sympy, statsmodels, scikit-learn. If you attempt to import another external package and encounter an error, do not say it cannot be imported. Instead, try to write new code that avoids this issue.
Carefully evaluate these solutions and identify the solution that is more capable of solving the problem compared to other solutions, as this is crucial for problem-solving.
Always output complete code rather than just giving suggestions or partial modifications, as your code will be executed directly. If immediate execution is required to check for possible errors, include test cases in the code.
In your response, only the code that needs to be run should be wrapped in multi-line code blocks. No other multi-line code blocks should appear. Your code needs to print the output after execution. Your code should not print error messages.
Problem description: {problem}
Please write Python code to solve this problem.
In the "thought" field, provide a detailed explanation of your thought process. In the "solution_letter" field, output only the single letter ID (A, B, C, etc.) corresponding to the solution. Do not include any additional text or explanation in the "solution_letter" field.
"""
REVIEW_PROMPT = """
Given a problem and a thoughtful solution, your task is to using critical thinking (questioning) to review the solution's correctness and provide a review result in boolean format.
problem: {problem}
solution: {solution}
If you are more than 95 percent confident that the final answer is incorrect, please return False and give a feedback for the error. Otherwise, please return True and give a explanation for the correctness.
"""
REVISE_PROMPT = """
Given a problem and a thoughtful solution which is just reviewed as incorrect, your task is to revise the solution to solve the question and ensure the final code solution is wrapped with ```python```.
problem: {problem}
solution: {solution}
feedback: {feedback}
Ensure the output code is self-contained, and without any additional text or test cases.
"""