diff --git a/examples/ags/scripts/evaluator.py b/examples/ags/scripts/evaluator.py
index 039c26812..d3759f277 100644
--- a/examples/ags/scripts/evaluator.py
+++ b/examples/ags/scripts/evaluator.py
@@ -47,13 +47,13 @@ class Evaluator:
         elif dataset == "MATH":
             return self._math_eval(graph, params, path, is_test)
         elif dataset == "HumanEval":
-            return self._humaneval_eval(graph, params, is_test)
+            return self._humaneval_eval(graph, params, path, is_test)
         elif dataset == "HotpotQA":
-            return self._hotpotqa_eval(graph, params, is_test)
+            return self._hotpotqa_eval(graph, params, path, is_test)
         elif dataset == "MBPP":
-            return self._mbpp_eval(graph, params, is_test)
+            return self._mbpp_eval(graph, params, path, is_test)
         elif dataset == "DROP":
-            return self._drop_eval(graph, params, is_test)
+            return self._drop_eval(graph, params, path, is_test)
 
     # def graph_evaluate(self, dataset: DatasetType, graph, params: dict, path):
     #     """
@@ -154,7 +154,7 @@ class Evaluator:
             va_list = [0]
         else:
             data_path = "examples/ags/data/human-eval_validate.jsonl"  # 替换为您的JSONL文件路径
-            va_list = [0]
+            va_list = None
 
         graph = await load_graph()
         
diff --git a/examples/ags/scripts/operator.py b/examples/ags/scripts/operator.py
index 3c2fac6d6..3be67b49a 100644
--- a/examples/ags/scripts/operator.py
+++ b/examples/ags/scripts/operator.py
@@ -12,7 +12,9 @@ from subprocess import PIPE, Popen, TimeoutExpired
 from typing import Dict, List, Tuple
 
 import concurrent.futures
+import threading
 from tenacity import retry, stop_after_attempt, wait_fixed
+from examples.ags.scripts.utils import extract_test_cases_from_jsonl
 
 from examples.ags.scripts.operator_an import (
     CodeGenerateOp,
@@ -371,10 +373,13 @@ class Rephrase(Operator):
 
 
 class Test(Operator):
-    def __init__(self, name: str = "Test", llm: LLM = LLM()):
+    def __init__(self, llm, name: str = "Test"):
         super().__init__(name, llm)
 
-    def exec_code(self, solution, test_cases, problem_id, entry_point):
+    def exec_code(self, solution, entry_point):
+
+        test_cases = extract_test_cases_from_jsonl(entry_point)
+        
         fail_cases = []
         for test_case in test_cases:
             test_code = test_case_2_test_function(solution, test_case, entry_point)
@@ -384,7 +389,7 @@ class Test(Operator):
                 exc_type, exc_value, exc_traceback = sys.exc_info()
                 tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
                 with open("tester.txt", "a") as f:
-                    f.write("test_error" + problem_id + "\n")
+                    f.write("test_error of " + entry_point + "\n")
                 error_infomation = {
                     "test_fail_case": {
                         "test_case": test_case,
@@ -397,7 +402,7 @@ class Test(Operator):
                 logger.info(f"test error: {error_infomation}")
             except Exception as e:
                 with open("tester.txt", "a") as f:
-                    f.write(problem_id + "\n")
+                    f.write(entry_point + "\n")
                 return {"exec_fail_case": str(e)}
         if fail_cases != []:
             return fail_cases
@@ -405,19 +410,23 @@ class Test(Operator):
             return "no error"
 
     async def __call__(
-        self, problem_id, problem, rephrase_problem, solution, test_cases, entry_point, test_loop: int = 3
+        self, problem, solution, entry_point, test_loop: int = 3
     ):
-        solution = solution["final_solution"]
+        """
+        "Test": {
+        "description": "Test the solution with test cases, if the solution is correct, return 'no error', if the solution is incorrect, return reflect on the soluion and the error information",
+        "interface": "test(problem: str, solution: str, entry_point: str) -> str"
+        }
+        """
         for _ in range(test_loop):
-            result = self.exec_code(solution, test_cases, problem_id, entry_point)
+            result = self.exec_code(solution, problem, entry_point)
             if result == "no error":
-                return {"final_solution": solution}
+                return {"result": True, "solution": solution}
             elif "exec_fail_case" in result:
                 result = result["exec_fail_case"]
                 prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
-                    problem_description=problem,
-                    rephrase_problem=rephrase_problem,
-                    code_solution=solution,
+                    problem=problem,
+                    solution=solution,
                     exec_pass=f"executed unsuccessfully, error: \n {result}",
                     test_fail="executed unsucessfully",
                 )
@@ -426,9 +435,8 @@ class Test(Operator):
                 solution = response["refined_solution"]
             else:
                 prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
-                    problem_description=problem,
-                    rephrase_problem=rephrase_problem,
-                    code_solution=solution,
+                    problem=problem,
+                    solution=solution,
                     exec_pass="executed successfully",
                     test_fail=result,
                 )
@@ -442,7 +450,7 @@ class Programmer(Operator):
     def __init__(self, llm: LLM, name: str = "Programmer"):
         super().__init__(name, llm)
 
-    async def exec_code(self, code, timeout=180):
+    async def exec_code(code, timeout=180):
         def run_code():
             try:
                 # 创建一个新的全局命名空间
@@ -461,13 +469,29 @@ class Programmer(Operator):
                 exc_type, exc_value, exc_traceback = sys.exc_info()
                 tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
                 return "Error", f"执行错误: {str(e)}\n{''.join(tb_str)}"
-            
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            future = executor.submit(run_code)
+
+        # 创建一个事件来标记任务完成
+        done_event = threading.Event()
+        result = ["Error", "执行无结果，子进程异常"]
+
+        def wrapper():
+            nonlocal result
+            result = run_code()
+            done_event.set()
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(wrapper)
             try:
-                return future.result(timeout=timeout)
-            except concurrent.futures.TimeoutError:
-                return "Error", "代码执行超时"
+                # 等待任务完成或超时
+                if done_event.wait(timeout=timeout):
+                    return result
+                else:
+                    # 超时，尝试取消任务
+                    future.cancel()
+                    return "Error", "代码执行超时"
+            finally:
+                # 确保线程池被正确关闭
+                executor.shutdown(wait=False)
 
     async def code_generate(self, problem, analysis, feedback, mode):
         prompt = PYTHON_CODE_VERIFIER_PROMPT.format(problem=problem, analysis=analysis, feedback=feedback)
diff --git a/optimize.py b/optimize.py
index 40adfc9b0..fcd3cc0e0 100644
--- a/optimize.py
+++ b/optimize.py
@@ -3,43 +3,41 @@
 # @Author  : didi
 # @Desc    : Experiment of graph optimization
 
-from examples.ags.w_action_node.optimizer import Optimizer
+from examples.ags.scripts.optimizer import Optimizer
 from metagpt.configs.models_config import ModelsConfig
 
-# 配置实验参数
-dataset = "Gsm8K"  # 数据集选择为GSM8K
-sample = 6  # 采样数量
-q_type = "math"  # 问题类型为数学
-optimized_path = "examples/ags/w_action_node/optimized"  # 优化结果保存路径
 
-# 初始化LLM模型
-deepseek_llm_config = ModelsConfig.default().get("deepseek-coder")
+# Crucial Parameters
+dataset = "HumanEval"  # DatasetType
+sample = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+question_type = "code"  # Question Type
+optimized_path = "examples/ags/scripts/optimized"  # Optimized Result Save Path
+
+# Initialize LLM Model
+mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
 claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
-# claude_llm_config = ModelsConfig.default().get("deepseek-coder")
-# 初始化操作符列表
-gsm8k_operators = [
+
+# Initialize Operators List
+operators = [
     "Custom",
-    "Generate",
-    "ContextualGenerate",
-    "Format",
-    "Review",
-    "Revise",
-    "FuEnsemble",
-    "MdEnsemble",
+    "CustomCodeGenerate",
     "ScEnsemble",
-    "Rephrase",
+    "Test",
 ]
 
-# 创建优化器实例
+# Create an optimizer instance
 optimizer = Optimizer(
     dataset=dataset,
     opt_llm_config=claude_llm_config,
-    exec_llm_config=deepseek_llm_config,
-    operators=gsm8k_operators,
+    exec_llm_config=mini_llm_config,
+    operators=operators,
     optimized_path=optimized_path,
     sample=sample,
-    q_type=q_type,
+    question_type=question_type,
 )
 
-# 运行优化器
-optimizer.optimize("Operator")
+# Run the optimizer
+optimizer.optimize("Graph", 10)
+# optimizer.optimize("Graph")
+# optimizer.optimize("Operator")
+