Update

2026-06-08 15:05:17 +02:00 · 2024-07-11 16:18:34 +08:00 · 2024-07-11 16:18:34 +08:00 · eb97b54a20
commit eb97b54a20
parent 4af2315c77
4 changed files with 16 additions and 11 deletions
--- a/examples/ags/benchmark/humaneval.py
+++ b/examples/ags/benchmark/humaneval.py
@ -21,7 +21,7 @@ solver = HumanEvalGraph(name="solver", llm=LLM(), criteria='correctness, efficie

 async def sample_generate(id):
    case = get_human_eval_plus()[f"{id}"]
-    solution_result = await solver(case['prompt'],ensemble_count=5)
+    solution_result = await solver(case['prompt'],ensemble_count=3)
    sample_dict = dict(task_id=case['task_id'], solution=solution_result['final_solution'])
    with open("samples.jsonl", mode='a') as f:
        f.write(json.dumps(sample_dict) + '\n')
@ -153,6 +153,7 @@ def extract_failure_tests(file_path:str = "/Users/trl/Github_project/MetaGPT-Mat


 # asyncio.run(sample_generate('HumanEval/101'))
-# asyncio.run(samples_generate(mode='llm'))
+# asyncio.run(samples_generate(mode='ags'))
 # jsonl_ranker("samples.jsonl", "samples.jsonl")
-# {"task_id": "HumanEval/101", "solution": "def words_string(s):\n    import re\n    return re.split(r'[,\\s]\\s*', s)"}
+# {"task_id": "HumanEval/101", "solution": "def words_string(s):\n    import re\n    return re.split(r'[,\\s]\\s*', s)"}
+
--- a/examples/ags/w_action_node/operator_an.py
+++ b/examples/ags/w_action_node/operator_an.py
@ -25,9 +25,5 @@ class EnsembleOp(BaseModel):
    final_solution: str = Field(default="", description="Final ensemble solution for this problem")

 class MdEnsembleOp(BaseModel):
-    thought: str = Field(default="",
-                          description="Analyze the solutions and think what's the best step by step.")
-    solution_letter: str = Field(default="",
-                                 description="""
-        Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem.
-        Provide your final decision by writing the chosen solution number. (eg.A) """)
+    thought: str = Field(default="", description="Analyze the solutions and think what's the best step by step.")
+    solution_letter: str = Field(default="", description="Choose The Best Solution, and output only one solution letter")
--- a/examples/ags/w_action_node/prompt.py
+++ b/examples/ags/w_action_node/prompt.py
@ -66,4 +66,7 @@ MD_ENSEMBLE_PROMPT = """
 <solutions>
 {solutions}
 </solutions>
+
+### Instructions
+Based on the problem and solution candidates, carefully analyze which is the best answer. Focus solely on the correctness of the solution in addressing the problem. Provide your final decision by writing the chosen solution number. (eg.B). Keep the json format.
 """
--- a/he_test.py
+++ b/he_test.py
@ -5,7 +5,7 @@ from examples.ags.w_action_node.utils import jsonl_ranker

 # asyncio.run(sample_generate('HumanEval/101'))
 # asyncio.run(sample_generate('HumanEval/1'))
-# asyncio.run(samples_generate(mode='llm'))
+asyncio.run(samples_generate(mode='ags'))
 # jsonl_ranker("samples.jsonl", "samples.jsonl")


@ -14,4 +14,9 @@ from examples.ags.w_action_node.utils import jsonl_ranker
 #     print(unpassed_exapmle)

 # unpassed_exapmle = extract_failure_tests()
-# print(unpassed_exapmle)
+# print(unpassed_exapmle)
+
+# failure_list = ['HumanEval/0', 'HumanEval/1', 'HumanEval/7', 'HumanEval/16', 'HumanEval/24', 'HumanEval/31', 'HumanEval/40', 'HumanEval/56', 'HumanEval/67', 'HumanEval/74', 'HumanEval/83', 'HumanEval/86', 'HumanEval/87', 'HumanEval/90', 'HumanEval/95', 'HumanEval/101', 'HumanEval/104', 'HumanEval/113', 'HumanEval/125', 'HumanEval/132', 'HumanEval/135', 'HumanEval/140', 'HumanEval/143', 'HumanEval/145', 'HumanEval/154', 'HumanEval/161']
+
+# for example in failure_list:
+#     asyncio.run(sample_generate(example))