This commit is contained in:
didi 2024-08-26 08:30:24 +08:00
parent 6a01a679ce
commit 7c2501e08b
12 changed files with 593 additions and 59 deletions

View file

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# @Date :
# @Author : issac
# @Desc : test on gsm8k
import asyncio
from deepeval.models.base_model import DeepEvalBaseLLM
# 这里是DeepEval强制定义的模型基础格式这里不需要进行改动只需要调用即可
class GraphModel(DeepEvalBaseLLM):
def __init__(self, graph):
self.solver = graph
def load_model(self):
pass
async def a_generate(self, prompt: str) -> str:
# TODO 还需要在这里继续整合Cost
solution_result, total_cost = await self.solver(prompt)
return solution_result
def generate(self, prompt: str) -> str:
loop = asyncio.get_event_loop()
solution_result = loop.run_until_complete(self.a_generate(prompt)) # 等待 a_generate 方法完成
return solution_result
def get_model_name(self):
return "Custom Azure OpenAI Model"

View file

@ -3,12 +3,16 @@
# @Author : all
# @Desc : evaluate for different dataset
import datetime
import inspect
import os
from typing import Literal
import pandas as pd
from deepeval.benchmarks import GSM8K
from examples.ags.benchmark.gsm8k import GraphModel
from examples.ags.w_action_node.graph import SolveGraph
# TODO 完成实验数据集的手动划分
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
@ -20,14 +24,14 @@ class Evaluator:
"""
def __init__(self, eval_path: str):
pass
self.eval_path = eval_path
def validation_evaluate(self, dataset: DatasetType, result_path: str):
def validation_evaluate(self, dataset: DatasetType, graph, params: dict):
"""
Evaluates on validation dataset.
"""
if dataset == "Gsm8K":
return self._gsm8k_eval(result_path)
return self._gsm8k_eval(graph, params)
pass
def test_evaluate(self, dataset: DatasetType):
@ -36,16 +40,12 @@ class Evaluator:
"""
pass
def _gsm8k_eval(self, model, result_path, samples: int = 1000):
def _gsm8k_eval(self, graph_class, params, samples: int = 1000):
"""
Evaluate on GSM8K dataset.
"""
if model is None:
raise ValueError("Model is required for evaluation.")
benchmark = GSM8K(n_problems=samples, n_shots=0, enable_cot=False)
goldens = benchmark.load_benchmark_dataset()[: benchmark.n_problems]
# TODO 划分验证集测试集
def _evaluate_problem(model, golden, benchmark):
prompt = golden.input
@ -70,12 +70,6 @@ class Evaluator:
return golden.input, str(prediction), golden.expected_output, score
results = [_evaluate_problem(model, golden, benchmark) for golden in goldens]
overall_correct_predictions = sum(score for _, _, _, score in results)
overall_total_predictions = benchmark.n_problems
overall_accuracy = overall_correct_predictions / overall_total_predictions
def process_gsm8k_csv(file_path, tolerance=1e-6):
# 读取 CSV 文件
df = pd.read_csv(file_path, dtype=str) # 使用默认逗号分隔符,并指定所有列为字符串类型
@ -129,6 +123,24 @@ class Evaluator:
return average_score
dataset = params["dataset"]
llm_config = params["llm_config"]
# TODO 给到的是load出来的Graph怎么让他做实例化
graph = SolveGraph(name="Gsm8K", llm_config=llm_config, dataset=dataset)
model = GraphModel(graph)
benchmark = GSM8K(n_problems=samples, n_shots=0, enable_cot=False)
graph_module = inspect.getmodule(graph_class)
os.path.dirname(graph_module.__file__)
goldens = benchmark.load_benchmark_dataset()[: benchmark.n_problems]
results = [_evaluate_problem(model, golden, benchmark) for golden in goldens]
overall_correct_predictions = sum(score for _, _, _, score in results)
overall_total_predictions = benchmark.n_problems
overall_accuracy = overall_correct_predictions / overall_total_predictions
predictions_row = [
(input, prediction, expected_output, score) for input, prediction, expected_output, score in results
]
@ -137,11 +149,11 @@ class Evaluator:
)
benchmark.overall_score = overall_accuracy
now = datetime.datetime.now()
now.strftime("%Y-%m-%d_%H-%M-%S").replace(":", "_")
now_time = now.strftime("%Y-%m-%d_%H-%M-%S").replace(":", "_")
# file_path = f'gsm8k_{overall_accuracy}_{now_time}.csv'
file_path = f"{self.eval_path}/gsm8k_{overall_accuracy}_{now_time}.csv"
benchmark.predictions.to_csv(result_path, index=False)
benchmark.predictions.to_csv(file_path, index=False)
score = process_gsm8k_csv(file_path=result_path)
score = process_gsm8k_csv(file_path=file_path)
return {"score": score}

View file

@ -12,27 +12,30 @@ from typing import Dict, List, Tuple
from tenacity import retry, stop_after_attempt
from examples.ags.w_action_node.operator_an import (
CodeGenerateOp,
FormatOp,
FuEnsembleOp,
GenerateCodeBlockOp,
GenerateOp,
MdEnsembleOp,
ReflectionTestOp,
RephraseOp,
ReviewOp,
ReviseOp,
ScEnsembleOp,
)
from examples.ags.w_action_node.prompt import (
CODE_CONTEXTUAL_GENERATE_PROMPT,
CONTEXTUAL_GENERATE_PROMPT,
FORMAT_PROMPT,
FU_ENSEMBLE_PROMPT,
GENERATE_CODEBLOCK_PROMPT,
GENERATE_CODEBLOCK_REPHRASE_PROMPT,
GENERATE_PROMPT,
MD_ENSEMBLE_PROMPT,
REFLECTION_ON_PUBLIC_TEST_PROMPT,
REPHRASE_ON_PROBLEM_PROMPT,
REVIEW_PROMPT,
REVISE_PROMPT,
SC_ENSEMBLE_PROMPT,
)
from examples.ags.w_action_node.utils import test_case_2_test_function
from metagpt.actions.action_node import ActionNode
@ -54,7 +57,7 @@ class Generate(Operator):
基于Action Node Fill Function的 Generate 算子
"""
def __init__(self, name: str = "Generate", llm: LLM = LLM()):
def __init__(self, llm: LLM, name: str = "Generate"):
super().__init__(name, llm)
async def __call__(self, problem_description):
@ -64,23 +67,42 @@ class Generate(Operator):
return response
class GenerateCodeBlock(Operator):
def __init__(self, name: str = "GenerateCodeBlock", llm: LLM = LLM()):
class ContextualGenerate(Operator):
def __init__(self, llm: LLM, name: str = "ContextualGenerate"):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def __call__(self, problem_description, thought, function_name):
prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem_description, thought=thought)
node = await ActionNode.from_pydantic(GenerateOp).fill(
context=prompt, llm=self.llm, function_name=function_name
)
response = node.instruct_content.model_dump()
return response
class CodeGenerate(Operator):
def __init__(self, name: str = "CodeGenerate", llm: LLM = LLM()):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def __call__(self, problem_description, function_name):
prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem_description)
node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
)
response = node.instruct_content.model_dump()
return response
class CodeContextualGenerate(Operator):
def __init__(self, llm: LLM, name: str = "CodeContextualGenerate"):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def rephrase_generate(self, problem_description, thought, function_name):
prompt = GENERATE_CODEBLOCK_REPHRASE_PROMPT.format(problem_description=problem_description, thought=thought)
node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(
async def __call__(self, problem_description, thought, function_name):
prompt = CODE_CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem_description, thought=thought)
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
)
response = node.instruct_content.model_dump()
@ -262,9 +284,28 @@ class ScEnsemble(Operator):
"""
Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models
Link: https://arxiv.org/abs/2203.11171
Paper: Universal Self-Consistency for Large Language Model Generation
Link: https://arxiv.org/abs/2311.17311
"""
pass
def __init__(self, name: str = "ScEnsemble", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, solutions: List[str], problem_description: str):
answer_mapping = {}
solution_text = ""
for index, solution in enumerate(solutions):
answer_mapping[chr(65 + index)] = index
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
node = await ActionNode.from_pydantic(ScEnsembleOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
answer = response.get("solution_letter", "")
answer = answer.strip().upper()
return {"final_solution": solutions[answer_mapping[answer]]}
class Rephrase(Operator):
@ -351,18 +392,3 @@ class Test(Operator):
response = node.instruct_content.model_dump()
solution = response["refined_solution"]
return {"final_solution": solution}
class FindFact(Operator):
def __init__(self, name: str = "FindFact", llm: LLM = LLM()):
super().__init__(name, llm)
class SelfAsk(Operator):
def __init__(self, name: str = "SelfAsk", llm: LLM = LLM()):
super().__init__(name, llm)
class Verify(Operator):
def __init__(self, name: str = "Verify", llm: LLM = LLM()):
super().__init__(name, llm)

View file

@ -10,7 +10,7 @@ class GenerateOp(BaseModel):
solution: str = Field(default="", description="Your solution for this problem")
class GenerateCodeBlockOp(BaseModel):
class CodeGenerateOp(BaseModel):
code_solution: str = Field(default="", description="Your complete code solution for this problem")
@ -88,5 +88,5 @@ class ReflectionTestOp(BaseModel):
)
class Optimize(BaseModel):
graph: str = Field(default="", description="graph")
class ScEnsembleOp(BaseModel):
solution_letter: str = Field(default="", description="The letter of most consistent solution.")

View file

@ -0,0 +1,211 @@
# -*- coding: utf-8 -*-
# @Date : 6/27/2024 17:36 PM
# @Author : didi
# @Desc : operator demo of ags
import random
from collections import Counter
from typing import Dict, List, Tuple
from tenacity import retry, stop_after_attempt
from examples.ags.w_action_node.optimized.Gsm8K.graphs.round_1.operator_an import (
FormatOp,
FuEnsembleOp,
GenerateOp,
MdEnsembleOp,
RephraseOp,
ReviewOp,
ReviseOp,
ScEnsembleOp,
)
from examples.ags.w_action_node.prompt import (
CONTEXTUAL_GENERATE_PROMPT,
FORMAT_PROMPT,
FU_ENSEMBLE_PROMPT,
GENERATE_PROMPT,
MD_ENSEMBLE_PROMPT,
REPHRASE_ON_PROBLEM_PROMPT,
REVIEW_PROMPT,
REVISE_PROMPT,
SC_ENSEMBLE_PROMPT,
)
from metagpt.actions.action_node import ActionNode
from metagpt.llm import LLM
class Operator:
def __init__(self, name, llm: LLM):
self.name = name
self.llm = llm
def __call__(self, *args, **kwargs):
raise NotImplementedError
class Generate(Operator):
def __init__(self, llm: LLM, name: str = "Generate"):
super().__init__(name, llm)
async def __call__(self, problem_description):
prompt = GENERATE_PROMPT.format(problem_description=problem_description)
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response
class ContextualGenerate(Operator):
def __init__(self, llm: LLM, name: str = "ContextualGenerate"):
super().__init__(name, llm)
@retry(stop=stop_after_attempt(3))
async def __call__(self, problem_description, thought, function_name):
prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem_description, thought=thought)
node = await ActionNode.from_pydantic(GenerateOp).fill(
context=prompt, llm=self.llm, function_name=function_name
)
response = node.instruct_content.model_dump()
return response
class Format(Generate):
def __init__(self, name: str = "Format", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, problem_description, solution):
prompt = FORMAT_PROMPT.format(problem_description=problem_description, solution=solution)
node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response
class Review(Operator):
def __init__(self, criteria, name: str = "Review", llm: LLM = LLM()):
self.criteria = criteria
super().__init__(name, llm)
async def __call__(self, problem_description, solution):
prompt = REVIEW_PROMPT.format(
problem_description=problem_description, solution=solution, criteria=self.criteria
)
node = await ActionNode.from_pydantic(ReviewOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response
class Revise(Operator):
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, problem_description, solution, feedback):
prompt = REVISE_PROMPT.format(problem_description=problem_description, solution=solution, feedback=feedback)
node = await ActionNode.from_pydantic(ReviseOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response
class FuEnsemble(Operator):
"""
Function: Critically evaluating multiple solution candidates, synthesizing their strengths, and developing an enhanced, integrated solution.
"""
def __init__(self, name: str = "FuEnsemble", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, solutions: List, problem_description):
solution_text = ""
for solution in solutions:
solution_text += str(solution) + "\n"
prompt = FU_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response
class MdEnsemble(Operator):
"""
Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
Link: https://arxiv.org/abs/2311.16452
"""
def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3):
super().__init__(name, llm)
self.vote_count = vote_count
@staticmethod
def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
shuffled_solutions = solutions.copy()
random.shuffle(shuffled_solutions)
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
return shuffled_solutions, answer_mapping
async def __call__(self, solutions: List[str], problem_description: str):
print(f"solution count: {len(solutions)}")
all_responses = []
for _ in range(self.vote_count):
shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
solution_text = ""
for index, solution in enumerate(shuffled_solutions):
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
answer = response.get("solution_letter", "")
answer = answer.strip().upper()
if answer in answer_mapping:
original_index = answer_mapping[answer]
all_responses.append(original_index)
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
final_answer = solutions[most_frequent_index]
return {"final_solution": final_answer}
class ScEnsemble(Operator):
"""
Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models
Link: https://arxiv.org/abs/2203.11171
Paper: Universal Self-Consistency for Large Language Model Generation
Link: https://arxiv.org/abs/2311.17311
"""
def __init__(self, name: str = "ScEnsemble", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, solutions: List[str], problem_description: str):
answer_mapping = {}
solution_text = ""
for index, solution in enumerate(solutions):
answer_mapping[chr(65 + index)] = index
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
node = await ActionNode.from_pydantic(ScEnsembleOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
answer = response.get("solution_letter", "")
answer = answer.strip().upper()
return {"final_solution": solutions[answer_mapping[answer]]}
class Rephrase(Operator):
"""
Paper: Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
Link: https://arxiv.org/abs/2404.14963
Paper: Achieving >97% on GSM8K: Deeply Understanding the Problems Makes LLMs Better Solvers for Math Word Problems
Link: https://arxiv.org/abs/2404.14963
"""
def __init__(self, name: str = "Rephrase", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, problem_description: str) -> str:
prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem_description)
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
response = node.instruct_content.model_dump()
return response["rephrased_problem"]

View file

@ -0,0 +1,55 @@
from pydantic import BaseModel, Field
class GenerateOp(BaseModel):
solution: str = Field(default="", description="Your solution for this problem")
class FormatOp(BaseModel):
solution: str = Field(default="", description="Your formatted answer for this problem")
class ReviewOp(BaseModel):
review_result: bool = Field(
default=False,
description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
)
feedback: str = Field(
default="",
description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
)
class ReviseOp(BaseModel):
revised_solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
class FuEnsembleOp(BaseModel):
thought: str = Field(
default="",
description="Analyze the solutions and think how to combine the advantages of various solutions to form the best possible solution.",
)
final_solution: str = Field(default="", description="Output the final solution after analysis and integration")
class MdEnsembleOp(BaseModel):
thought: str = Field(
default="""Example thought process:
1. Examined the 'compare_one' function.
2. The function correctly handles both numeric and string inputs by converting strings to floats.
3. It properly compares two values and returns the larger one.
4. The function returns None if the values are equal, which might be useful in some contexts but could be improved by returning either value.
5. The use of 'isinstance' for type checking is a good practice.
6. The function handles decimal separators well by replacing ',' with '.'.
Overall, this solution effectively solves the problem of comparing two values, with good error handling and flexibility. It could be improved by specifying behavior for equal values, but it's a strong solution as is.""",
description="Step-by-step analysis of the solutions to determine the best one.",
)
solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
class RephraseOp(BaseModel):
rephrased_problem: str = Field(default="", description="Rephrased problem description for this problem")
class ScEnsembleOp(BaseModel):
solution_letter: str = Field(default="", description="The letter of most consistent solution.")

View file

@ -3,6 +3,7 @@
# @Author : issac
# @Desc : optimizer for graph
import asyncio
import json
import os
import re
@ -18,6 +19,9 @@ from examples.ags.w_action_node.prompts.optimize_prompt import (
GRAPH_INPUT,
GRAPH_OPTIMIZE_PROMPT,
GRAPH_TEMPLATE,
OPERATOR_INPUT,
OPERATOR_OPTIMIZE_PROMPT,
OPERATOR_TEMPLATE,
)
from metagpt.actions.action_node import ActionNode
from metagpt.llm import LLM
@ -26,6 +30,7 @@ from metagpt.logs import logger
config_iterate_path = "iterate"
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
OptimizerType = Literal["Complete", "Graph", "Operator"]
evaluator = Evaluator(eval_path="eval")
@ -50,12 +55,14 @@ class Optimizer:
optimized_path: str = None,
sample: int = 6,
q_type: str = "math", # math,code,quiz
op: str = "Generator", # 需要优化的Operator
) -> None:
self.optimize_llm = opt_llm
self.execute_llm = exec_llm
self.dataset = dataset
self.graph = None # 初始化为 None稍后加载
self.operators = operators
self.op = op
self.optimize_prompt = ""
self._optimized_path = optimized_path
self.root_path = f"{self._optimized_path}/{self.dataset}"
@ -104,12 +111,27 @@ class Optimizer:
# 初始化Graph直接手动从模版中取出COT
def optimize(self):
def optimize(self, mode: OptimizerType = "Complete", max_rounds: int = 100):
"""
Optimize the graph and operator for the dataset.
"""
self._initialize() # Operator's Optimization
self._optimize() # Graph's Optimization
if mode == "Complete":
self._initialize() # Operator's Optimization
for opt_round in range(max_rounds):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
score = loop.run_until_complete(self._optimize_graph())
finally:
loop.close()
time.sleep(5)
self.round += 1
print(f"Score for round {self.round}: {score}")
def _load_graph(self, round_number, graphs_path):
"""
@ -281,7 +303,7 @@ class Optimizer:
print(f"Processed experience data saved to {output_path}")
return experience_data
async def _optimize(self):
async def _optimize_graph(self):
"""
Optimize Graph's Structure and Prompt
"""
@ -329,10 +351,11 @@ class Optimizer:
)
graph_system = GRAPH_OPTIMIZE_PROMPT.format(type=self.type)
node_prompt = graph_system + graph_input # TODO 看一眼谁先谁后这个地方
graph_optimize_prompt = graph_system + graph_input # TODO 看一眼谁先谁后这个地方
node = await ActionNode.from_pydantic(GraphOptimize).fill(
context=node_prompt, mode="context_fill", llm=self.llm
# TODO 从这里开始Graph Optimize 可以作为一个Operator放入 Operator.py 之中
graph_optimize_node = await ActionNode.from_pydantic(GraphOptimize).fill(
context=graph_optimize_prompt, mode="context_fill", llm=self.llm
)
max_retries = 5
@ -341,7 +364,7 @@ class Optimizer:
while retries < max_retries:
try:
# TODO 需要和评测的模型分开传入模型或其它方法如果能实现Temperature调整更好
response = node.instruct_content.model_dump()
response = graph_optimize_node.instruct_content.model_dump()
break
except Exception as e:
@ -353,7 +376,6 @@ class Optimizer:
break
time.sleep(5)
# TODO 这里其实可以省去
graph_match = response["graph"]
prompt_match = response["prompt"]
modification_match = response["modification"]
@ -388,6 +410,118 @@ class Optimizer:
score = evaluator.validation_evaluate(self.dataset, self.graph)
experience["after"] = score
experience["succeed"] = bool(score > experience["before"])
return score
async def _optimize_operator(self):
"""
Optimize Graph's Structure and Prompt
"""
# 获取项目的根目录
graph_path = f"{self.root_path}/operators"
# 创建文件夹(如果不存在)
directory = os.path.join(graph_path, f"round_{self.round + 1}")
os.makedirs(directory, exist_ok=True)
top_rounds = self._get_top_rounds()
sample = self._select_round(top_rounds)
print(top_rounds)
prompt, graph_load, operator_load = self._read_files(sample["round"])
score = sample["score"]
# 正则表达式匹配 SolveGraph 开始的内容
operator_pattern = rf"class {self.op}(Operator):.+"
graph_pattern = r"class SolveGraph:.+"
# 使用re.findall找到所有匹配项
operator = re.findall(operator_pattern, operator_load, re.DOTALL)
graph = re.findall(graph_pattern, graph_load, re.DOTALL)
# 加载处理过的 experience 数据
processed_experience = self._load_experience()
# 获取当前轮次的 experience 数据
current_round = int(sample["round"]) # 确保是字符串类型
experience_data = processed_experience.get(current_round)
if experience_data:
# 构建 experience 字符串
experience = f"Original Score: {experience_data['score']}\n"
experience += "Failed modifications:\n"
for mod in experience_data["failure"]:
experience += f"- {mod['modification']} (Score: {mod['score']})\n"
experience += "\n\nNote: Reference failed experiences, avoid trying failed approaches again, attempt to change your thinking, not limited to using more advanced Python syntax like for, if, else, etc., or modifying the Prompt part"
else:
experience = f"No experience data found for round {current_round}."
operator_input = OPERATOR_INPUT.format(
experinece=experience, score=score, operator=operator[0], prompt=prompt, type=self.type, graph=graph[0]
)
operator_system = OPERATOR_OPTIMIZE_PROMPT.format(type=self.type)
node_prompt = operator_system + operator_input # TODO 看一眼谁先谁后这个地方
node = await ActionNode.from_pydantic(GraphOptimize).fill(
context=node_prompt, mode="context_fill", llm=self.llm
)
max_retries = 5
retries = 0
while retries < max_retries:
try:
# TODO 需要和评测的模型分开传入模型或其它方法如果能实现Temperature调整更好
response = node.instruct_content.model_dump()
break
except Exception as e:
retries += 1
print(f"Error generating prediction: {e}. Retrying... ({retries}/{max_retries})")
if retries == max_retries:
print("Maximum retries reached. Skipping this sample.")
break
time.sleep(5)
# TODO 这里其实可以省去
operator_match = response["operator"]
prompt_match = response["prompt"]
modification_match = response["modification"]
modification = modification_match.group(1)
prompt = prompt_match.group(1)
operator = OPERATOR_TEMPLATE.format(operator=operator_match.group(1), round=self.round + 1)
# 将 operator.py 文件写入到目录中
with open(os.path.join(directory, "operator.py"), "w", encoding="utf-8") as file:
file.write(operator)
# 将 prompt.py 文件写入到目录中
with open(os.path.join(directory, "prompt.py"), "w", encoding="utf-8") as file:
file.write(prompt)
# 将 prompt.py 文件写入到目录中
with open(os.path.join(directory, "__init__.py"), "w", encoding="utf-8") as file:
file.write("")
experience = {
"father node": sample["round"],
"modification": modification,
"before": sample["score"],
"after": None,
"succeed": None,
}
with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
json.dump(experience, file, ensure_ascii=False, indent=4)
score = evaluator.validation_evaluate(self.dataset, self.graph)
experience["after"] = score
experience["succeed"] = bool(score > experience["before"])
def test(self, graph_path: str):
"""

View file

@ -7,8 +7,14 @@ GENERATE_PROMPT = """
Generate Solution for the following problem: {problem_description}
"""
GENERATE_SOLUTION_PROMPT = """
Generate a text solution for the following problemL: {problem_description}
CONTEXTUAL_GENERATE_PROMPT = """
Generate Solution for the following problem:
## Problem Description
{problem_description}
## Thought
{thought}
"""
GENERATE_CODE_SOLUTION_PROMPT = """
@ -28,7 +34,7 @@ Guidelines:
- Double-check the solutions. Each possible solution must be able to generalize to additional test cases, not just the ones provided in the problem description.
"""
GENERATE_CODEBLOCK_REPHRASE_PROMPT = """
CODE_CONTEXTUAL_GENERATE_PROMPT = """
Please provide a self-contained Python script that solves the following problem in a markdown code block:
### Problem Description
@ -134,6 +140,16 @@ Provide your final decision by writing the chosen solution letter.
Please maintain the JSON format in your response.
"""
SC_ENSEMBLE_PROMPT = """
I have generated the following solutions to the question: {problem_description}
{solutions}
Evaluate these solutions.
Select the most consistent solution based on majority consensus.
Give your answer with a single id of solution (without anything else).
"""
DE_ENSEMBLE_TXT_FORMAT_PROMPT = """
Now please output your answer in json format, with the format as follows:
{\"Reason\": \"\", \"debate_answer\": \"the capital letter corresponding to the answer\"}.

View file

@ -69,3 +69,54 @@ GRAPH_TEMPLATE = """import os
{graph}
"""
OPERATOR_OPTIMIZE_PROMPT = """You are building a Operator and corresponding Prompt to jointly solve {type} problems.
Referring to the given combination of Operator and prompt, which forms a basic example of a {type} solution approach, please reconstruct and optimize the Prompt and Operator. You can add, modify, or delete nodes and parameters in the Operator, as well as modify, delete, or add new Prompts.
Put your modification (only make one point of change, i.e., one sentence), and the modified Prompt and Operator in XML tags in your reply. They will be used as new Prompt and Operator for calculation and iteration. Please ensure they are complete and correct, otherwise it may lead to runtime failures.
Only modify the parts in Prompt and Operator.
Don't be limited to the previous format.You can consider Python's built-in loops (like for, while, and list comprehensions) or conditional statements (such as if-elif-else and ternary operators), or even machine learning methods ranging from basic supervised learning techniques (e.g., linear regression, decision trees) to more advanced approaches like neural networks and clustering algorithms. However, you must ensure that each call to the Operator internally involves at most 10 interactions, i.e., the complexity of the Operator does not exceed 15."""
OPERATOR_INPUT = """
Here is a Operator and corresponding Prompt that performed excellently in a previous iteration (maximum score is 1), Graph calls the Operator:\n
<sample>
<experience>{experience}</experience>
<modification>None</modification>
<score>{score}</score>
<operator>{operator}</operator>
<prompt>{prompt}</prompt>
<graph>{graph}</graph>
</sample>
First provide optimization ideas. Note that ANSWER_FORMAT_PROMPT must exist and cannot be modified. Only add/modify/delete one detail point, extensive modifications are prohibited.\n\n"
"""
OPERATOR_TEMPLATE = """
import ast
import random
import sys
import traceback
from collections import Counter
from typing import Dict, List, Tuple
from tenacity import retry, stop_after_attempt
from examples.ags.w_action_node.optimized.gsm8k.operators.round_{round}.prompt import *
from examples.ags.w_action_node.operator_an import (
GenerateOp,
)
from metagpt.actions.action_node import ActionNode
from metagpt.llm import LLM
from metagpt.logs import logger
class Operator:
def __init__(self, name, llm: LLM):
self.name = name
self.llm = llm
def __call__(self, *args, **kwargs):
raise NotImplementedError
{operator}
"""