Update Operator's code

2026-07-23 17:01:08 +02:00 · 2024-10-21 11:27:50 +08:00 · 2024-10-21 11:27:50 +08:00 · ade10684b7
commit ade10684b7
parent 478589e1c7
3 changed files with 272 additions and 326 deletions
--- a/examples/aflow/scripts/operator.py
+++ b/examples/aflow/scripts/operator.py
@ -4,36 +4,36 @@
 # @Desc    : operator demo of aflow
 import random
 import sys
+import asyncio
 import traceback
 from collections import Counter
 from typing import Dict, List, Tuple

 import concurrent.futures
-import threading
 from tenacity import retry, stop_after_attempt, wait_fixed
 from examples.aflow.scripts.utils import extract_test_cases_from_jsonl

 from examples.aflow.scripts.operator_an import (
-    CodeGenerateOp,
    FormatOp,
    GenerateOp,
-    MdEnsembleOp,
+    CodeGenerateOp,
+    AnswerGenerateOp,
+    ScEnsembleOp,
    ReflectionTestOp,
+    MdEnsembleOp,
    ReviewOp,
    ReviseOp,
-    ScEnsembleOp,
+
 )
 from examples.aflow.scripts.prompt import (
-    CONTEXTUAL_GENERATE_PROMPT,
    FORMAT_PROMPT,
-    GENERATE_CODEBLOCK_PROMPT,
-    GENERATE_PROMPT, # TODO
-    MD_ENSEMBLE_PROMPT,
+    ANSWER_GENERATION_PROMPT,
+    SC_ENSEMBLE_PROMPT,
    PYTHON_CODE_VERIFIER_PROMPT,
    REFLECTION_ON_PUBLIC_TEST_PROMPT,
+    MD_ENSEMBLE_PROMPT,
    REVIEW_PROMPT,
    REVISE_PROMPT,
-    SC_ENSEMBLE_PROMPT,
 )
 from examples.aflow.scripts.utils import test_case_2_test_function
 from metagpt.actions.action_node import ActionNode
@ -42,159 +42,50 @@ from metagpt.logs import logger


 class Operator:
-    def __init__(self, name, llm: LLM):
+    def __init__(self, llm: LLM, name: str):
        self.name = name
        self.llm = llm

    def __call__(self, *args, **kwargs):
        raise NotImplementedError

+    async def _fill_node(self, op_class, prompt, mode=None, **extra_kwargs):
+        fill_kwargs = {"context": prompt, "llm": self.llm}
+        if mode:
+            fill_kwargs["mode"] = mode
+        fill_kwargs.update(extra_kwargs)
+        node = await ActionNode.from_pydantic(op_class).fill(**fill_kwargs)
+        return node.instruct_content.model_dump()
+

 class Custom(Operator):
    def __init__(self, llm: LLM, name: str = "Custom"):
-        super().__init__(name, llm)
-
-    async def __call__(self, input, instruction, mode: str = None):
-        prompt = input + instruction
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response
-
-
-class Generate(Operator):
-    def __init__(self, llm: LLM, name: str = "Generate"):
-        super().__init__(name, llm)
-
-    async def __call__(self, problem, mode: str = None):
-        prompt = GENERATE_PROMPT.format(problem_description=problem)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response
-
-
-class ContextualGenerate(Operator):
-    def __init__(self, llm: LLM, name: str = "ContextualGenerate"):
-        super().__init__(name, llm)
-
-    @retry(stop=stop_after_attempt(3))
-    async def __call__(self, problem, context, mode: str = None):
-        prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem, thought=context)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response
-
-
-class CodeGenerate(Operator):
-    def __init__(self, name: str = "CodeGenerate", llm: LLM = LLM()):
-        super().__init__(name, llm)
-
-    @retry(stop=stop_after_attempt(3))
-    async def __call__(self, problem, function_name, mode: str = None):
-        prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem)
-        fill_kwargs = {"context": prompt, "llm": self.llm, "function_name": function_name}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(CodeGenerateOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response  # {"code": "xxx"}
-
-class Format(Generate):
-    def __init__(self, name: str = "Format", llm: LLM = LLM()):
        super().__init__(llm, name)

-    async def __call__(self, problem, solution, mode: str = None):
-        prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(FormatOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response 
+    async def __call__(self, input, instruction):
+        prompt = instruction + input
+        response = await self._fill_node(GenerateOp, prompt, mode="single_fill")
+        return response
+    
+class AnswerGenerate(Operator):
+    def __init__(self, llm: LLM, name: str = "AnswerGenerate"):
+        super().__init__(llm, name)

+    async def __call__(self, input: str, mode: str = None) -> Tuple[str, str]:
+        prompt = ANSWER_GENERATION_PROMPT.format(input=input)
+        response = await self._fill_node(AnswerGenerateOp, prompt, mode="context_fill")
+        return response

-class Review(Operator):
-    def __init__(self, criteria: str = "accuracy", name: str = "Review", llm: LLM = LLM()):
-        self.criteria = criteria
-        super().__init__(name, llm)
+class CustomCodeGenerate(Operator):
+    def __init__(self, llm: LLM, name: str = "CustomCodeGenerate"):
+        super().__init__(llm, name)

-    async def __call__(self, problem, solution, mode: str = None):
-        prompt = REVIEW_PROMPT.format(problem_description=problem, solution=solution, criteria=self.criteria)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(ReviewOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response 
+    async def __call__(self, problem, entry_point, instruction):
+        prompt = instruction + problem
+        response = await self._fill_node(GenerateOp, prompt, mode="code_fill", function_name=entry_point)
+        return response


-class Revise(Operator):
-    def __init__(self, name: str = "Revise", llm: LLM = LLM()):
-        super().__init__(name, llm)
-
-    async def __call__(self, problem, solution, feedback, mode: str = None):
-        prompt = REVISE_PROMPT.format(problem_description=problem, solution=solution, feedback=feedback)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(ReviseOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
-        return response  
-
-class MdEnsemble(Operator):
-    """
-    Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
-    Link: https://arxiv.org/abs/2311.16452
-    """
-
-    def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3):
-        super().__init__(name, llm)
-        self.vote_count = vote_count
-
-    @staticmethod
-    def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
-        shuffled_solutions = solutions.copy()
-        random.shuffle(shuffled_solutions)
-        answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
-        return shuffled_solutions, answer_mapping
-
-    async def __call__(self, solutions: List[str], problem: str, mode: str = None):
-        logger.info(f"solution count: {len(solutions)}")
-        all_responses = []
-
-        for _ in range(self.vote_count):
-            shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
-
-            solution_text = ""
-            for index, solution in enumerate(shuffled_solutions):
-                solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
-
-            prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
-            fill_kwargs = {"context": prompt, "llm": self.llm}
-            if mode:
-                fill_kwargs["mode"] = mode
-            node = await ActionNode.from_pydantic(MdEnsembleOp).fill(**fill_kwargs)
-            response = node.instruct_content.model_dump()
-
-            answer = response.get("solution_letter", "")
-            answer = answer.strip().upper()
-
-            if answer in answer_mapping:
-                original_index = answer_mapping[answer]
-                all_responses.append(original_index)
-
-        most_frequent_index = Counter(all_responses).most_common(1)[0][0]
-        final_answer = solutions[most_frequent_index]
-        return {"solution": final_answer}  
-
 class ScEnsemble(Operator):
    """
    Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models
@ -203,31 +94,118 @@ class ScEnsemble(Operator):
    Link: https://arxiv.org/abs/2311.17311
    """

-    def __init__(self, name: str = "ScEnsemble", llm: LLM = LLM()):
-        super().__init__(name, llm)
+    def __init__(self, llm: LLM, name: str = "ScEnsemble"):
+        super().__init__(llm, name)

-    async def __call__(self, solutions: List[str], problem: str, mode: str = None):
+    async def __call__(self, solutions: List[str]):
        answer_mapping = {}
        solution_text = ""
        for index, solution in enumerate(solutions):
            answer_mapping[chr(65 + index)] = index
            solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"

-        prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
-        fill_kwargs = {"context": prompt, "llm": self.llm}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(ScEnsembleOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
+        prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text)
+        response = await self._fill_node(ScEnsembleOp, prompt, mode="context_fill")

        answer = response.get("solution_letter", "")
        answer = answer.strip().upper()

-        return {"solution": solutions[answer_mapping[answer]]} 
+        return {"response": solutions[answer_mapping[answer]]}
+
+def run_code(code):
+    try:
+        # Create a new global namespace
+        global_namespace = {}
+
+        disallowed_imports = [
+            "os", "sys", "subprocess", "multiprocessing",
+            "matplotlib", "seaborn", "plotly", "bokeh", "ggplot",
+            "pylab", "tkinter", "PyQt5", "wx", "pyglet"
+        ]
+
+        # Check for prohibited imports
+        for lib in disallowed_imports:
+            if f"import {lib}" in code or f"from {lib}" in code:
+                logger.info("Detected prohibited import: %s", lib)
+                return "Error", f"Prohibited import: {lib} and graphing functionalities"
+
+        # Use exec to execute the code
+        exec(code, global_namespace)
+        # Assume the code defines a function named 'solve'
+        if 'solve' in global_namespace and callable(global_namespace['solve']):
+            result = global_namespace['solve']()
+            return "Success", str(result)
+        else:
+            return "Error", "Function 'solve' not found"
+    except Exception as e:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        return "Error", f"Execution error: {str(e)}\n{''.join(tb_str)}"
+    
+
+class Programmer(Operator):
+    def __init__(self, llm: LLM, name: str = "Programmer"):
+        super().__init__(llm, name)
+
+    async def exec_code(self, code, timeout=30):
+        """
+        Asynchronously execute code and return an error if timeout occurs.
+        """
+        loop = asyncio.get_running_loop()
+        with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
+            try:
+                # Submit run_code task to the process pool
+                future = loop.run_in_executor(executor, run_code, code)
+                # Wait for the task to complete or timeout
+                result = await asyncio.wait_for(future, timeout=timeout)
+                return result
+            except asyncio.TimeoutError:
+                # Timeout, attempt to shut down the process pool
+                executor.shutdown(wait=False, cancel_futures=True)
+                return "Error", "Code execution timed out"
+            except Exception as e:
+                return "Error", f"Unknown error: {str(e)}"
+
+    async def code_generate(self, problem, analysis, feedback, mode):
+        """
+        Asynchronous method to generate code.
+        """
+        prompt = PYTHON_CODE_VERIFIER_PROMPT.format(
+            problem=problem,
+            analysis=analysis,
+            feedback=feedback
+        )
+        response = await self._fill_node(CodeGenerateOp, prompt, mode, function_name="solve")
+        return response
+
+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+    async def __call__(self, problem: str, analysis: str = "None"):
+        """
+        Call method, generate code and execute, retry up to 3 times.
+        """
+        code = None
+        output = None
+        feedback = ""
+        for i in range(3):
+            code_response = await self.code_generate(problem, analysis, feedback, mode="code_fill")
+            code = code_response.get("code")
+            if not code:
+                return {"code": code, "output": "No code generated"}
+            status, output = await self.exec_code(code)
+            if status == "Success":
+                return {"code": code, "output": output}
+            else:
+                print(f"Execution error on attempt {i + 1}, error message: {output}")
+                feedback = (
+                    f"\nThe result of the error from the code you wrote in the previous round:\n"
+                    f"Code: {code}\n\nStatus: {status}, {output}"
+                )
+        return {"code": code, "output": output}
+

 class Test(Operator):
-    def __init__(self, llm, name: str = "Test"):
-        super().__init__(name, llm)
+    def __init__(self, llm: LLM, name: str = "Test"):
+        super().__init__(llm, name)

    def exec_code(self, solution, entry_point):

@ -282,8 +260,7 @@ class Test(Operator):
                    exec_pass=f"executed unsuccessfully, error: \n {result}",
                    test_fail="executed unsucessfully",
                )
-                node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm, mode="code_fill")
-                response = node.instruct_content.model_dump()
+                response = await self._fill_node(ReflectionTestOp, prompt, mode="code_fill")
                solution = response["reflection_and_solution"]
            else:
                prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
@ -292,8 +269,7 @@ class Test(Operator):
                    exec_pass="executed successfully",
                    test_fail=result,
                )
-                node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm, mode="code_fill")
-                response = node.instruct_content.model_dump()
+                response = await self._fill_node(ReflectionTestOp, prompt, mode="code_fill")
                solution = response["reflection_and_solution"]
        
        result = self.exec_code(solution, entry_point)
@ -301,74 +277,75 @@ class Test(Operator):
            return {"result": True, "solution": solution}
        else:
            return {"result": False, "solution": solution}
+    

-class Programmer(Operator):
-    def __init__(self, llm: LLM, name: str = "Programmer"):
-        super().__init__(name, llm)
+class Format(Operator):
+    def __init__(self, llm: LLM, name: str = "Format"):
+        super().__init__(llm, name)

-    async def exec_code(code, timeout=180):
-        def run_code():
-            try:
-                # Create a new global namespace
-                global_namespace = {}
-                
-                # Use exec to execute the code
-                exec(code, global_namespace)
-                
-                # Assume the code defines a function named 'solve'
-                if 'solve' in global_namespace:
-                    result = global_namespace['solve']()
-                    return "Success", str(result)
-                else:
-                    return "Error", "Function 'solve' not found"
-            except Exception as e:
-                exc_type, exc_value, exc_traceback = sys.exc_info()
-                tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
-                return "Error", f"Execution error: {str(e)}\n{''.join(tb_str)}"
+    async def __call__(self, problem, solution, mode: str = None):
+        prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
+        response = await self._fill_node(FormatOp, prompt, mode)
+        return response 

-        # Create an event to mark task completion
-        done_event = threading.Event()
-        result = ["Error", "Execution resulted in no output, subprocess exception"]

-        def wrapper():
-            nonlocal result
-            result = run_code()
-            done_event.set()
+class Review(Operator):
+    def __init__(self, llm: LLM, name: str = "Review"):
+        super().__init__(llm, name)

-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(wrapper)
-            try:
-                # Wait for task completion or timeout
-                if done_event.wait(timeout=timeout):
-                    return result
-                else:
-                    # Timeout, attempt to cancel the task
-                    future.cancel()
-                    return "Error", "Code execution timed out"
-            finally:
-                # Ensure the thread pool is properly shut down
-                executor.shutdown(wait=False)
-
-    async def code_generate(self, problem, analysis, feedback, mode):
-        prompt = PYTHON_CODE_VERIFIER_PROMPT.format(problem=problem, analysis=analysis, feedback=feedback)
-        fill_kwargs = {"context": prompt, "llm": self.llm, "function_name": "solve"}
-        if mode:
-            fill_kwargs["mode"] = mode
-        node = await ActionNode.from_pydantic(CodeGenerateOp).fill(**fill_kwargs)
-        response = node.instruct_content.model_dump()
+    async def __call__(self, problem, solution, mode: str = None):
+        prompt = REVIEW_PROMPT.format(problem=problem, solution=solution)
+        response = await self._fill_node(ReviewOp, prompt, mode="context_fill")
        return response

-    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
-    async def __call__(self, problem: str, analysis: str = "None"):
-        code = None
-        feedback = ""
-        for i in range(3):
-            code = await self.code_generate(problem, analysis, feedback, mode="code_fill")
-            code = code["code"]
-            status, output = await self.exec_code(code)
-            if status == "Success":
-                return {"code": code, "output": output}
-            else:
-                logger.info(f"Execution error in attempt {i + 1}, error message: {output}")
-                feedback = f"\nThe result of the error from the code you wrote in the previous round:\nCode:{code}\n\nStatus:{status},{output}"
-        return {"code": code, "output": "error"}
+class Revise(Operator):
+    def __init__(self, llm: LLM, name: str = "Revise"):
+        super().__init__(llm, name)
+
+    async def __call__(self, problem, solution, feedback, mode: str = None):
+        prompt = REVISE_PROMPT.format(problem=problem, solution=solution, feedback=feedback)
+        response = await self._fill_node(ReviseOp, prompt, mode="context_fill")
+        return response  
+
+
+class MdEnsemble(Operator):
+    """
+    Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
+    Link: https://arxiv.org/abs/2311.16452
+    """
+
+    def __init__(self, llm: LLM, name: str = "MdEnsemble", vote_count: int = 5):
+        super().__init__(llm, name)
+        self.vote_count = vote_count
+
+    @staticmethod
+    def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
+        shuffled_solutions = solutions.copy()
+        random.shuffle(shuffled_solutions)
+        answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
+        return shuffled_solutions, answer_mapping
+
+    async def __call__(self, solutions: List[str], problem: str, mode: str = None):
+        print(f"solution count: {len(solutions)}")
+        all_responses = []
+
+        for _ in range(self.vote_count):
+            shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
+
+            solution_text = ""
+            for index, solution in enumerate(shuffled_solutions):
+                solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
+
+            prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, question=problem)
+            response = await self._fill_node(MdEnsembleOp, prompt, mode="context_fill")
+
+            answer = response.get("solution_letter", "A")
+            answer = answer.strip().upper()
+
+            if answer in answer_mapping:
+                original_index = answer_mapping[answer]
+                all_responses.append(original_index)
+
+        most_frequent_index = Counter(all_responses).most_common(1)[0][0]
+        final_answer = solutions[most_frequent_index]
+        return {"solution": final_answer}  
--- a/examples/aflow/scripts/operator_an.py
+++ b/examples/aflow/scripts/operator_an.py
@ -5,41 +5,35 @@

 from pydantic import BaseModel, Field

+
 class GenerateOp(BaseModel):
    response: str = Field(default="", description="Your solution for this problem")

 class CodeGenerateOp(BaseModel):
    code: str = Field(default="", description="Your complete code solution for this problem")

+class AnswerGenerateOp(BaseModel):
+    thought: str = Field(default="", description="The step by step thinking process")
+    answer: str = Field(default="", description="The final answer to the question")
+
 class FormatOp(BaseModel):
    solution: str = Field(default="", description="Your formatted answer for this problem")

+class ScEnsembleOp(BaseModel):
+    thought: str = Field(default="", description="The thought of the most consistent solution.")
+    solution_letter: str = Field(default="", description="The letter of most consistent solution.")
+
+class ReflectionTestOp(BaseModel):
+    reflection_and_solution: str = Field(default="", description="Corrective solution for code execution errors or test case failures")
+
+class MdEnsembleOp(BaseModel):
+    thought: str = Field(default="", description="Step-by-step analysis of the solutions to determine the best one.")
+    solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
+
 class ReviewOp(BaseModel):
-    review_result: bool = Field(
-        default=False,
-        description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
-    )
-    feedback: str = Field(
-        default="",
-        description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
-    )
+    review_result: bool = Field(default=False, description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'")
+    feedback: str = Field(default="",description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.")

 class ReviseOp(BaseModel):
    solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")

-class MdEnsembleOp(BaseModel):
-    thought: str = Field(
-        default="",
-        description="Step-by-step analysis of the solutions to determine the best one.",
-    )
-    solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
-
-class ScEnsembleOp(BaseModel):
-    solution_letter: str = Field(default="", description="The letter of most consistent solution.")
-
-class ReflectionTestOp(BaseModel):
-    reflection_and_solution: str = Field(
-        default="", description="Corrective solution for code execution errors or test case failures"
-    )
-
-
--- a/examples/aflow/scripts/prompt.py
+++ b/examples/aflow/scripts/prompt.py
@ -3,26 +3,11 @@
 # @Author  : didi
 # @Desc    : prompts of operators

-CONTEXTUAL_GENERATE_PROMPT = """
-Generate Solution for the following problem: 
-
-## Problem Description
-{problem_description}
-
-## Thought
-{thought}
-"""
-
-GENERATE_CODEBLOCK_PROMPT = """
-Please provide a self-contained  Python script that solves the following problem in a markdown code block:
-
-{problem_description}
-
-When creating your solution:
-1. Consider all edge cases and boundary conditions.
-2. Avoid oversimplification - address all aspects of the problem.
-3. Ensure your logic covers all stated requirements.
-4. Avoid adding additional test cases beyond those provided in the problem description.
+ANSWER_GENERATION_PROMPT = """
+Think step by step and solve the problem.
+1. In the "thought" field, explain your thinking process in detail.
+2. In the "answer" field, provide the final answer concisely and clearly. The answer should be a direct response to the question, without including explanations or reasoning.
+Your task: {input}
 """

 FORMAT_PROMPT = """
@ -31,59 +16,32 @@ please extract a short and concise answer contains only one word/few words from
 Make sure there are no additional comments or explanations in your response.
 """

-REVIEW_PROMPT = """
-For the question described as {problem_description},
-please review the following solution: {solution}, and provide a review result in boolean format.
-```
-You will be reviewing the problem-solving process of another AI assistant that has answered a mathematical question. Your task is to evaluate the solution and provide a detailed review for refinement. Follow these steps:
-<step1>
-Carefully read through the original question and entire solution, paying close attention to the relevant concepts, thinking process, calculations, and final result. Assess whether the solution is clear, logical, and well-organized. Write your initial review in <initialReview> tags.
-</step1>
-<step2>
-Evaluate the reasoning and logic behind the solution. Ensure that the thinking process is clear, coherent, and mathematically sound. If you find any areas that need clarification or improvement, provide your suggestions inside <reasoningFeedback> tags.
-</step2>
-<step3>
-Re-do the calculations presented in the <calculation> section **carefully and step-by-step** to verify the accuracy. Break down the calculations into the simplest possible steps and check each step for errors. You must not be careless and treat every part with rigor. Don't neglect checking any calculation part of the solution process. If you find any mistakes, note them down inside <calculationErrors> tags.
-</step3>
-<step4>
-Provide an overall assessment of the solution's thoroughness, accuracy, and clarity inside <overallAssessment> tags. Highlight the strengths and weaknesses of the solution and offer suggestions for improvement, if any.
-</step4>
-use XML tags to present your complete evaluation, including initial review, calculation errors, reasoning feedback, and overall assessment, in a well-organized and easy-to-follow format.
-Remember to be thorough, constructive, and professional in your review. Your goal is to help improve the quality and accuracy of the mathematical problem-solving process.
-```
-If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments
-"""
-
-REVISE_PROMPT = """
-For the question described as {problem_description},
-please evaluate and revise the solution provided: {solution}, taking into account the review feedbacks: {feedback}."
-Then output the revised solution.
-"""
-
-MD_ENSEMBLE_PROMPT = """
-You are given a problem:
-{problem_description}
-
-Here is a list of possible solutions to the problem:
-{solutions}
-
-Using the inputs above, your goal is to choose the best solution to the problem.
-The main consideration is that the solution can fully solve the problem in a correct and robust manner.
-Provide your final decision by writing the chosen solution letter.
-
-Please follow the required format in your response.
-"""
-
 SC_ENSEMBLE_PROMPT = """
-I have generated the following solutions to the question: {problem_description}
-
+Given the question described as follows: {question}
+Several solutions have been generated to address the given question. They are as follows:
 {solutions}

-Evaluate these solutions.
-Select the most consistent solution based on majority consensus.
-Give your answer with a single id of solution (without anything else).
+Carefully evaluate these solutions and identify the answer that appears most frequently across them. This consistency in answers is crucial for determining the most reliable solution.
+
+In the "thought" field, provide a detailed explanation of your thought process. In the "solution_letter" field, output only the single letter ID (A, B, C, etc.) corresponding to the most consistent solution. Do not include any additional text or explanation in the "solution_letter" field.
 """

+PYTHON_CODE_VERIFIER_PROMPT = """
+You are a professional Python programmer. Your task is to write complete, self-contained code based on a given mathematical problem and output the answer. The code should include all necessary imports and dependencies, and be ready to run without additional setup or environment configuration.
+
+Problem description: {problem}
+Other analysis: {analysis}
+{feedback}
+
+Your code should:
+1. Implement the calculation steps described in the problem.
+2. Define a function named `solve` that performs the calculation and returns the result. The `solve` function should not require any input parameters; instead, it should obtain all necessary inputs from within the function or from globally defined variables.
+3. `solve` function return the final calculation result.
+
+Please ensure your code is efficient, well-commented, and follows Python best practices. The output should be limited to basic data types such as strings, integers, and floats. It is prohibited to transmit images or other file formats. The code output is intended for a text-based language model.
+"""
+
+
 REFLECTION_ON_PUBLIC_TEST_PROMPT = """
 Given a code problem and a python code solution which failed to pass test or execute, you need to analyze the reason for the failure and propose a better code solution.: 
 ### problem
@ -101,14 +59,31 @@ Given a code problem and a python code solution which failed to pass test or exe
 Please provide a reflection on the failed test cases and code solution, followed by a better code solution without any additional text or test cases.
 """

-PYTHON_CODE_VERIFIER_PROMPT = """You are a professional Python programmer. Your task is to write Python code based on the user's request. Make sure to add appropriate explanations and your personal thought process to your code. Additionally, all code should be encapsulated in Python code blocks.
+MD_ENSEMBLE_PROMPT = """
+Given the question described as follows: {question}
+Several solutions have been generated to address the given question. They are as follows:
+{solutions}

-The packages you can use include: numpy, scipy, pandas, sympy, statsmodels, scikit-learn. If you attempt to import another external package and encounter an error, do not say it cannot be imported. Instead, try to write new code that avoids this issue.
+Carefully evaluate these solutions and identify the solution that is more capable of solving the problem compared to other solutions, as this is crucial for problem-solving.

-Always output complete code rather than just giving suggestions or partial modifications, as your code will be executed directly. If immediate execution is required to check for possible errors, include test cases in the code.
-
-In your response, only the code that needs to be run should be wrapped in multi-line code blocks. No other multi-line code blocks should appear. Your code needs to print the output after execution. Your code should not print error messages.
-
-Problem description: {problem}
-Please write Python code to solve this problem.
+In the "thought" field, provide a detailed explanation of your thought process. In the "solution_letter" field, output only the single letter ID (A, B, C, etc.) corresponding to the solution. Do not include any additional text or explanation in the "solution_letter" field.
 """
+
+REVIEW_PROMPT = """
+Given a problem and a thoughtful solution, your task is to using critical thinking (questioning) to review the solution's correctness and provide a review result in boolean format.
+
+problem: {problem}
+solution: {solution}
+
+If you are more than 95 percent confident that the final answer is incorrect, please return False and give a feedback for the error. Otherwise, please return True and give a explanation for the correctness.
+"""
+
+REVISE_PROMPT = """
+Given a problem and a thoughtful solution which is just reviewed as incorrect, your task is to revise the solution to solve the question and ensure the final code solution is wrapped with ```python```.
+
+problem: {problem}
+solution: {solution}
+feedback: {feedback}
+
+Ensure the output code is self-contained, and without any additional text or test cases.
+"""