Update humaneval

This commit is contained in:
didi 2024-07-10 16:23:38 +08:00
parent 86033a1037
commit 4af2315c77
6 changed files with 46 additions and 13 deletions

View file

@ -73,6 +73,8 @@ async def samples_generate(mode:str):
if automatic_evalplus():
unpassed_exapmle = extract_failure_tests()
print(unpassed_exapmle)
else:
print(failed_tasks)
async def samples_generate_ags():
sample_list = []

View file

@ -64,4 +64,6 @@ class HumanEvalGraph(Graph):
break
solution = await self.revise(problem, solution, review_feedback['feedback'])
solution = solution.get('revised_solution')
return solution
return solution

View file

@ -157,4 +157,14 @@ class MdEnsemble(Operator):
class ScEnsemble(Operator):
# TODO
pass
class Debate(Operator):
# TODO
"""
You agree with my answer 90% of the time and have almost no reservations. Affirm your agreement, share any additional thoughts if you have them, and conclude with the capital letter corresponding to your answer at the end of your response.
"""
pass
class CriticalThinkingAbstract(Operator):
pass

View file

@ -25,5 +25,9 @@ class EnsembleOp(BaseModel):
final_solution: str = Field(default="", description="Final ensemble solution for this problem")
class MdEnsembleOp(BaseModel):
thought: str = Field(default="", description="Analyze the solutions and think what's the best step by step.")
solution_letter: str = Field(default="", description="Choose The Best Solution, and output the solution letter")
thought: str = Field(default="",
description="Analyze the solutions and think what's the best step by step.")
solution_letter: str = Field(default="",
description="""
Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem.
Provide your final decision by writing the chosen solution number. (eg.A) """)

View file

@ -45,13 +45,25 @@ For the question described as {problem_description}, Solutions: {solutions}
Please select the solution that appears most frequently from these options and ensemble this to provide best solution.
"""
MD_ENSEMBLE_PROMPT = """
# Context
For the question described as {problem_description},
Solutions can be seen below:
{solutions}
# MD_ENSEMBLE_PROMPT = """
# # Context
# For the question described as {problem_description},
# Solutions can be seen below:
# {solutions}
# Instruction
Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem.
Provide your final decision by writing the chosen solution number (e.g., A).
# # Instruction
# Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem.
# Provide your final decision by writing the chosen solution number (e.g., A).
# """
MD_ENSEMBLE_PROMPT = """
### Given problem
{problem_description}
### We've got a list of solutions
<solutions>
{solutions}
</solutions>
"""

View file

@ -5,10 +5,13 @@ from examples.ags.w_action_node.utils import jsonl_ranker
# asyncio.run(sample_generate('HumanEval/101'))
# asyncio.run(sample_generate('HumanEval/1'))
asyncio.run(samples_generate(mode='ags'))
# asyncio.run(samples_generate(mode='llm'))
# jsonl_ranker("samples.jsonl", "samples.jsonl")
# if automatic_evalplus():
# unpassed_exapmle = extract_failure_tests()
# print(unpassed_exapmle)
# print(unpassed_exapmle)
# unpassed_exapmle = extract_failure_tests()
# print(unpassed_exapmle)