diff --git a/examples/ags/benchmark/humaneval.py b/examples/ags/benchmark/humaneval.py index ea7b9dedb..f2c1d5bb2 100644 --- a/examples/ags/benchmark/humaneval.py +++ b/examples/ags/benchmark/humaneval.py @@ -73,6 +73,8 @@ async def samples_generate(mode:str): if automatic_evalplus(): unpassed_exapmle = extract_failure_tests() print(unpassed_exapmle) + else: + print(failed_tasks) async def samples_generate_ags(): sample_list = [] diff --git a/examples/ags/w_action_node/graph.py b/examples/ags/w_action_node/graph.py index 3870bfd6d..e5b04a874 100644 --- a/examples/ags/w_action_node/graph.py +++ b/examples/ags/w_action_node/graph.py @@ -64,4 +64,6 @@ class HumanEvalGraph(Graph): break solution = await self.revise(problem, solution, review_feedback['feedback']) solution = solution.get('revised_solution') - return solution \ No newline at end of file + return solution + + diff --git a/examples/ags/w_action_node/operator.py b/examples/ags/w_action_node/operator.py index 3b832cc18..6c0bacd9e 100644 --- a/examples/ags/w_action_node/operator.py +++ b/examples/ags/w_action_node/operator.py @@ -157,4 +157,14 @@ class MdEnsemble(Operator): class ScEnsemble(Operator): # TODO + pass + +class Debate(Operator): + # TODO + """ + You agree with my answer 90% of the time and have almost no reservations. Affirm your agreement, share any additional thoughts if you have them, and conclude with the capital letter corresponding to your answer at the end of your response. + """ + pass + +class CriticalThinkingAbstract(Operator): pass \ No newline at end of file diff --git a/examples/ags/w_action_node/operator_an.py b/examples/ags/w_action_node/operator_an.py index ae3c22827..66d8baf08 100644 --- a/examples/ags/w_action_node/operator_an.py +++ b/examples/ags/w_action_node/operator_an.py @@ -25,5 +25,9 @@ class EnsembleOp(BaseModel): final_solution: str = Field(default="", description="Final ensemble solution for this problem") class MdEnsembleOp(BaseModel): - thought: str = Field(default="", description="Analyze the solutions and think what's the best step by step.") - solution_letter: str = Field(default="", description="Choose The Best Solution, and output the solution letter") \ No newline at end of file + thought: str = Field(default="", + description="Analyze the solutions and think what's the best step by step.") + solution_letter: str = Field(default="", + description=""" + Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem. + Provide your final decision by writing the chosen solution number. (eg.A) """) \ No newline at end of file diff --git a/examples/ags/w_action_node/prompt.py b/examples/ags/w_action_node/prompt.py index dcc1428de..f81331a55 100644 --- a/examples/ags/w_action_node/prompt.py +++ b/examples/ags/w_action_node/prompt.py @@ -45,13 +45,25 @@ For the question described as {problem_description}, Solutions: {solutions} Please select the solution that appears most frequently from these options and ensemble this to provide best solution. """ -MD_ENSEMBLE_PROMPT = """ -# Context -For the question described as {problem_description}, -Solutions can be seen below: -{solutions} +# MD_ENSEMBLE_PROMPT = """ +# # Context +# For the question described as {problem_description}, +# Solutions can be seen below: +# {solutions} -# Instruction -Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem. -Provide your final decision by writing the chosen solution number (e.g., A). +# # Instruction +# Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem. +# Provide your final decision by writing the chosen solution number (e.g., A). +# """ + +MD_ENSEMBLE_PROMPT = """ +### Given problem + +{problem_description} + +### We've got a list of solutions + + +{solutions} + """ \ No newline at end of file diff --git a/he_test.py b/he_test.py index fa827a4c1..2102dee83 100644 --- a/he_test.py +++ b/he_test.py @@ -5,10 +5,13 @@ from examples.ags.w_action_node.utils import jsonl_ranker # asyncio.run(sample_generate('HumanEval/101')) # asyncio.run(sample_generate('HumanEval/1')) -asyncio.run(samples_generate(mode='ags')) +# asyncio.run(samples_generate(mode='llm')) # jsonl_ranker("samples.jsonl", "samples.jsonl") # if automatic_evalplus(): # unpassed_exapmle = extract_failure_tests() -# print(unpassed_exapmle) \ No newline at end of file +# print(unpassed_exapmle) + +# unpassed_exapmle = extract_failure_tests() +# print(unpassed_exapmle) \ No newline at end of file