Update

2026-05-03 21:02:38 +02:00 · 2024-07-17 23:08:41 +08:00 · 2024-07-17 23:08:41 +08:00 · 89b0c4ce30
commit 89b0c4ce30
parent e0955c5bf9
7 changed files with 208 additions and 58 deletions
--- a/examples/ags/benchmark/humaneval.py
+++ b/examples/ags/benchmark/humaneval.py
@ -19,10 +19,14 @@ generate_code_block = GenerateCodeBlock(llm=LLM())

 solver = HumanEvalGraph(name="solver", llm=LLM(), criteria='correctness, efficiency, readability', vote_count=5)

-async def sample_generate(id, result_path:str="samples.jsonl"):
+async def sample_generate(id, result_path:str="samples.jsonl",mode:str="ags"):
    case = get_human_eval_plus()[f"{id}"]
-    solution_result = await solver(case['prompt'],ensemble_count=5)
-    sample_dict = dict(task_id=case['task_id'], solution=solution_result['final_solution'])
+    if mode == "ags":
+        solution_result = await solver(case['prompt'],ensemble_count=5)
+        sample_dict = dict(task_id=case['task_id'], solution=solution_result['final_solution'])
+    else:
+        solution_result =  await generate_code_block(case['prompt'])
+        sample_dict = dict(task_id=case['task_id'], solution=solution_result['code_solution'])
    with open(result_path, mode='a') as f:
        f.write(json.dumps(sample_dict) + '\n')
    jsonl_ranker(result_path, result_path)
@ -62,11 +66,29 @@ async def samples_generate(mode:str, result_path:str="samples.jsonl"):

    # TODO 这个地方还是不够自动化
    if failed_tasks:
-        for task_id in failed_tasks:
-            try:
-                await sample_generate(task_id) 
-            except Exception as e:
-                print(f"failure {task_id}")
+        print(failed_tasks)
+        if mode == 'llm':
+            for task_id in failed_tasks:
+                case = get_human_eval_plus()[task_id]
+                for _ in range(3):
+                    try:
+                        solution_result = await generate_code_block(case['prompt'])
+                        task_dict = {
+                        'task_id': case['task_id'],
+                        'solution': solution_result['code_solution']
+                        }
+                        with open(result_path, mode='a') as f:
+                            f.write(json.dumps(task_dict) + '\n')
+                        failed_tasks.remove(task_id)
+                        break
+                    except Exception as e:
+                        print(f"{e} \n failure {task_id}")
+        elif mode == "ags":
+            for task_id in failed_tasks:
+                try:
+                    await sample_generate(task_id,result_path) 
+                except Exception as e:
+                    print(f"failure {task_id}")
    jsonl_ranker(result_path, result_path)
    
    if not failed_tasks:
--- a/examples/ags/w_action_node/graph.py
+++ b/examples/ags/w_action_node/graph.py
@ -32,10 +32,16 @@ class HumanEvalGraph(Graph):
    async def __call__(self, problem:str, ensemble_count:int = 3):
        solution_list = []
        for _ in range(ensemble_count):
-            solution = await self.generate_code(problem)
-            # solution = await self.generate_code_block(problem)
-            solution = solution.get('code_solution')
-            solution_list.append(solution)
+            for retry_count in range(5):
+                try:
+                    # solution = await self.generate_code(problem)
+                    solution = await self.generate_code_block(problem)
+                    solution = solution.get('code_solution')
+                    solution_list.append(solution)
+                    break
+                except Exception as e:
+                    print(e)
+            # solution list 有5个
        solution = await self.mdensemble("code", solution_list, problem)
        return solution
    
--- a/examples/ags/w_action_node/operator.py
+++ b/examples/ags/w_action_node/operator.py
@ -127,8 +127,8 @@ class MdEnsemble(Operator):
                    continue
            solutions = updated_solutions
            updated_length = len(solutions)
-            print(f"Original number of solutions: {original_length}")
-            print(f"Updated number of solutions: {updated_length}")
+            # print(f"Original number of solutions: {original_length}")
+            # print(f"Updated number of solutions: {updated_length}")
            if updated_length == 1:
                return {"final_solution": solutions[0]}
        for _ in range(self.vote_count):
@ -136,7 +136,7 @@ class MdEnsemble(Operator):
            
            solution_text = ""
            for index, solution in enumerate(shuffled_solutions):
-                solution_text += f"{chr(65 + index)}: {str(solution)}\n"
+                solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
    
            prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
            node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
--- a/examples/ags/w_action_node/operator_an.py
+++ b/examples/ags/w_action_node/operator_an.py
@ -39,6 +39,6 @@ class MdEnsembleOp(BaseModel):
    )
    solution_letter: str = Field(
        default="",
-        description="The letter of the chosen best solution (output only one letter)."
+        description="The letter of the chosen best solution (only one letter)."
    )

--- a/examples/ags/w_action_node/prompt.py
+++ b/examples/ags/w_action_node/prompt.py
@ -83,23 +83,15 @@ Based on the given problem and solution candidates:
 """

 MD_ENSEMBLE_PROMPT = """
-### Given problem
-
+You are given a coding problem:
 {problem_description}

-### We've got a list of solutions
-
-<solutions>
+Here is a list of possible solutions to the problem:
 {solutions}
-</solutions>

-### Instructions
-Carefully analyze the given problem and the list of solution candidates. Your task is to determine the best answer based solely on how correctly and effectively it addresses the problem. Follow these steps:
-
-1. Thoroughly examine each solution.
-2. Evaluate their relevance and effectiveness in solving the problem.
-3. Compare the solutions to identify the most suitable one.
-4. Provide your final decision by writing the chosen solution letter (e.g., B).
+Using the inputs above, your goal is to choose the best solution to the code contest problem.
+Don't just pick the most efficient solution. The main consideration is that the solution can fully solve the problem in a correct and robust manner.
+Provide your final decision by writing the chosen solution letter (e.g., B).

 Please maintain the JSON format in your response.
 """