mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
Update Operator Optimize Method.
This commit is contained in:
parent
d97f90f9c7
commit
c3903412b4
14 changed files with 1161 additions and 1040 deletions
|
|
@ -2,15 +2,14 @@
|
|||
# @Date : 8/23/2024 10:00 AM
|
||||
# @Author : all
|
||||
# @Desc : evaluate for different dataset
|
||||
import datetime
|
||||
import inspect
|
||||
import os
|
||||
from typing import Literal
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
|
||||
import aiofiles
|
||||
import pandas as pd
|
||||
from deepeval.benchmarks import GSM8K
|
||||
|
||||
from examples.ags.benchmark.gsm8k import GraphModel
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
|
||||
# TODO 完成实验数据集的手动划分
|
||||
|
||||
|
|
@ -25,12 +24,13 @@ class Evaluator:
|
|||
def __init__(self, eval_path: str):
|
||||
self.eval_path = eval_path
|
||||
|
||||
def validation_evaluate(self, dataset: DatasetType, graph, params: dict):
|
||||
def validation_evaluate(self, dataset: DatasetType, graph, params: dict, path):
|
||||
"""
|
||||
Evaluates on validation dataset.
|
||||
"""
|
||||
if dataset == "Gsm8K":
|
||||
return self._gsm8k_eval(graph, params)
|
||||
score = self._gsm8k_eval(graph, params, path)
|
||||
return score
|
||||
pass
|
||||
|
||||
def test_evaluate(self, dataset: DatasetType):
|
||||
|
|
@ -39,22 +39,70 @@ class Evaluator:
|
|||
"""
|
||||
pass
|
||||
|
||||
def _gsm8k_eval(self, graph_class, params, samples: int = 1000):
|
||||
async def _gsm8k_eval(self, graph_class, params, path, samples: int = 10):
|
||||
"""
|
||||
Evaluate on GSM8K dataset.
|
||||
"""
|
||||
|
||||
# TODO 划分验证集测试集
|
||||
def _evaluate_problem(model, golden, benchmark):
|
||||
prompt = golden.input
|
||||
# 模拟加载模型的函数
|
||||
async def load_graph():
|
||||
dataset = params["dataset"]
|
||||
llm_config = params["llm_config"]
|
||||
graph = graph_class(name="Gsm8K", llm_config=llm_config, dataset=dataset)
|
||||
return graph
|
||||
|
||||
max_retries = 50
|
||||
# 清理文本并提取单个数字
|
||||
def extract_number(text: str) -> Optional[float]:
|
||||
# 使用正则表达式提取数字,包括整数和浮点数
|
||||
matches = re.findall(r"[-+]?\d+(?:,\d{3})*(?:\.\d+)?|\d+\.\d+", text)
|
||||
print(matches)
|
||||
if matches:
|
||||
# 获取最后一个匹配的数字
|
||||
last_number = matches[-1]
|
||||
|
||||
# 移除逗号以统一格式
|
||||
last_number = last_number.replace(",", "")
|
||||
|
||||
try:
|
||||
return float(last_number)
|
||||
except ValueError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
# 宽松匹配分数计算函数
|
||||
def loose_match_score(expected_output: str, prediction: str, tolerance: float = 1e-6) -> int:
|
||||
expected_number = extract_number(expected_output)
|
||||
predicted_number = extract_number(prediction)
|
||||
|
||||
print(predicted_number)
|
||||
|
||||
# 如果预期输出或预测输出为空,返回不匹配
|
||||
if expected_number is None or predicted_number is None:
|
||||
return 0
|
||||
|
||||
# 比较两个提取出的数字,允许一定的容差
|
||||
if abs(expected_number - predicted_number) <= tolerance:
|
||||
return 1 # 数字相近,认为匹配成功
|
||||
else:
|
||||
return 0 # 数字不匹配
|
||||
|
||||
# 异步评估单个问题
|
||||
async def _evaluate_problem(input: str, graph, expected_output: str) -> Tuple[str, str, str, int]:
|
||||
prompt = input
|
||||
|
||||
print("Question", prompt)
|
||||
max_retries = 5
|
||||
retries = 0
|
||||
|
||||
while retries < max_retries:
|
||||
try:
|
||||
prediction = model.a_generate(prompt)
|
||||
score = benchmark.scorer.exact_match_score(golden.expected_output, prediction)
|
||||
# 假设模型有一个异步生成函数
|
||||
prediction = await graph(prompt) if graph else "None" # 这是一个占位符,替换成实际的模型生成逻辑
|
||||
print(type(prediction))
|
||||
print("预测", prediction[0])
|
||||
|
||||
score = loose_match_score(expected_output, prediction[0]["response"])
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -67,92 +115,87 @@ class Evaluator:
|
|||
score = 0
|
||||
break
|
||||
|
||||
return golden.input, str(prediction), golden.expected_output, score
|
||||
return input, prediction, expected_output, score
|
||||
|
||||
def process_gsm8k_csv(file_path, tolerance=1e-6):
|
||||
# 读取 CSV 文件
|
||||
df = pd.read_csv(file_path, dtype=str) # 使用默认逗号分隔符,并指定所有列为字符串类型
|
||||
# 异步读取JSONL文件
|
||||
async def load_data(file_path: str) -> List[dict]:
|
||||
data = []
|
||||
async with aiofiles.open(file_path, mode="r") as file:
|
||||
async for line in file:
|
||||
data.append(json.loads(line))
|
||||
return data[:samples]
|
||||
|
||||
# 清理预测和期望输出列
|
||||
df["prediction"] = df["prediction"].str.strip()
|
||||
df["prediction"] = df["prediction"].str.replace(",", "", regex=True)
|
||||
df["expected output"] = df["expected output"].str.strip()
|
||||
df["expected output"] = df["expected output"].str.replace(",", "", regex=True)
|
||||
# 并行评估所有问题
|
||||
async def evaluate_all_problems(data: List[dict], graph, max_concurrent_tasks: int = 1):
|
||||
semaphore = asyncio.Semaphore(max_concurrent_tasks)
|
||||
|
||||
# 将列转换为数值类型
|
||||
df["prediction"] = pd.to_numeric(df["prediction"], errors="coerce")
|
||||
df["expected output"] = pd.to_numeric(df["expected output"], errors="coerce")
|
||||
async def sem_evaluate(problem):
|
||||
async with semaphore:
|
||||
input_text = problem["question"]
|
||||
expected_output = problem["answer"]
|
||||
return await _evaluate_problem(input_text, graph, expected_output)
|
||||
|
||||
# 计算 score 列
|
||||
# 对于浮点数,使用近似相等的逻辑
|
||||
df["score"] = (df["prediction"] - df["expected output"]).abs() <= tolerance
|
||||
tasks = [sem_evaluate(problem) for problem in data]
|
||||
|
||||
# 将布尔值转换为整数
|
||||
df["score"] = df["score"].astype(int)
|
||||
# 使用tqdm.gather来显示进度条
|
||||
return await tqdm_asyncio.gather(*tasks, desc="Evaluating problems", total=len(data))
|
||||
|
||||
# 计算 score 列的平均值
|
||||
# 保存结果到CSV文件
|
||||
def save_results_to_csv(results: List[Tuple[str, str, str, int]], path):
|
||||
df = pd.DataFrame(results, columns=["question", "prediction", "expected_output", "score"])
|
||||
average_score = df["score"].mean()
|
||||
|
||||
# 获取输入文件的目录
|
||||
input_dir = os.path.dirname(file_path)
|
||||
|
||||
# 创建输出文件路径
|
||||
output_file_name = f"{average_score:.4f}.csv"
|
||||
output_file_path = os.path.join(input_dir, output_file_name)
|
||||
|
||||
# 写入新的 CSV 文件
|
||||
df.to_csv(output_file_path, index=False)
|
||||
|
||||
print(f"Data written to {output_file_path}")
|
||||
print(f"Average score: {average_score:.4f}")
|
||||
|
||||
# 统计空值数量
|
||||
num_empty_predictions = df["prediction"].isna().sum()
|
||||
|
||||
# 删除包含空 prediction 的行
|
||||
df = df.dropna(subset=["prediction"])
|
||||
|
||||
# 重新计算正确的、错误的以及空的个数
|
||||
num_correct = (df["score"] == 1).sum()
|
||||
num_incorrect = (df["score"] == 0).sum()
|
||||
|
||||
print(f"Number of empty predictions: {num_empty_predictions}")
|
||||
print(f"Number of correct predictions after removing empty ones: {num_correct}")
|
||||
print(f"Number of incorrect predictions after removing empty ones: {num_incorrect}")
|
||||
# 生成文件名,保留五位小数
|
||||
output_file = f"{path}/{average_score:.5f}.csv"
|
||||
df.to_csv(output_file, index=False)
|
||||
print(f"Results saved to {output_file}")
|
||||
|
||||
return average_score
|
||||
|
||||
dataset = params["dataset"]
|
||||
llm_config = params["llm_config"]
|
||||
async def gsm8k():
|
||||
file_path = "examples/ags/data/gsm8k.jsonl" # 替换为您的JSONL文件路径
|
||||
data = await load_data(file_path)
|
||||
|
||||
# TODO 给到的是load出来的Graph,怎么让他做实例化?graph_class 可以跟我这样用吗?
|
||||
graph = graph_class(name="Gsm8K", llm_config=llm_config, dataset=dataset)
|
||||
model = GraphModel(graph)
|
||||
benchmark = GSM8K(n_problems=samples, n_shots=0, enable_cot=False)
|
||||
graph = await load_graph()
|
||||
|
||||
graph_module = inspect.getmodule(graph_class)
|
||||
os.path.dirname(graph_module.__file__)
|
||||
goldens = benchmark.load_benchmark_dataset()[: benchmark.n_problems]
|
||||
# TODO 这里需要查看Graph的结构为什么没有办法实现
|
||||
print("--------------")
|
||||
print(graph)
|
||||
print("--------------")
|
||||
results = await evaluate_all_problems(data, graph, max_concurrent_tasks=20)
|
||||
|
||||
results = [_evaluate_problem(model, golden, benchmark) for golden in goldens]
|
||||
# 保存结果到CSV文件并获取平均分
|
||||
average_score = save_results_to_csv(results, path=path)
|
||||
|
||||
overall_correct_predictions = sum(score for _, _, _, score in results)
|
||||
overall_total_predictions = benchmark.n_problems
|
||||
overall_accuracy = overall_correct_predictions / overall_total_predictions
|
||||
print(f"Average score: {average_score:.5f}")
|
||||
return average_score
|
||||
|
||||
predictions_row = [
|
||||
(input, prediction, expected_output, score) for input, prediction, expected_output, score in results
|
||||
]
|
||||
benchmark.predictions = pd.DataFrame(
|
||||
predictions_row, columns=["input", "prediction", "expected output", "score"]
|
||||
)
|
||||
benchmark.overall_score = overall_accuracy
|
||||
now = datetime.datetime.now()
|
||||
now_time = now.strftime("%Y-%m-%d_%H-%M-%S").replace(":", "_")
|
||||
score = await gsm8k()
|
||||
|
||||
file_path = f"{self.eval_path}/gsm8k_{overall_accuracy}_{now_time}.csv"
|
||||
return score
|
||||
|
||||
benchmark.predictions.to_csv(file_path, index=False)
|
||||
|
||||
score = process_gsm8k_csv(file_path=file_path)
|
||||
return {"score": score}
|
||||
if __name__ == "__main__":
|
||||
|
||||
def extract_number(text: str) -> Optional[float]:
|
||||
# 使用正则表达式提取数字,包括整数和浮点数
|
||||
matches = re.findall(r"[-+]?\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+\.\d+", text)
|
||||
print(matches)
|
||||
if matches:
|
||||
# 获取最后一个匹配的数字
|
||||
last_number = matches[-1]
|
||||
|
||||
# 移除逗号以统一格式
|
||||
last_number = last_number.replace(",", "")
|
||||
|
||||
try:
|
||||
return float(last_number)
|
||||
except ValueError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
num = extract_number(
|
||||
r"To determine how much Janet makes every day at the farmers' market, we need to follow these steps:\n\n1. **Calculate the total number of eggs Janet uses daily:**\n - She eats 3 eggs for breakfast.\n - She uses 4 eggs to bake muffins.\n - Total eggs used = 3 (for breakfast) + 4 (for muffins) = 7 eggs.\n\n2. **Determine the number of eggs left to sell:**\n - Total eggs laid per day = 16 eggs.\n - Eggs left after use = 16 (total eggs) - 7 (eggs used) = 9 eggs.\n\n3. **Calculate the revenue from selling the remaining eggs:**\n - She sells each egg for $2.\n - Total revenue = 9 (eggs left) * $2 (price per egg) = $18.\n\nThus, Janet makes $18,000 every day at the farmers' market."
|
||||
)
|
||||
print(num)
|
||||
|
|
|
|||
|
|
@ -5,15 +5,14 @@
|
|||
|
||||
from typing import Literal
|
||||
|
||||
# from examples.ags.w_action_node.operator import * 改
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
|
||||
|
||||
cost_manager = CostManager()
|
||||
|
||||
|
||||
class Graph:
|
||||
class SolveGraph:
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
|
|
@ -24,9 +23,11 @@ class Graph:
|
|||
self.dataset = dataset
|
||||
self.llm = create_llm_instance(llm_config)
|
||||
self.llm.cost_manager = CostManager()
|
||||
# self.generate = Generate() 改
|
||||
|
||||
def __call__(self):
|
||||
async def __call__(self, problem: str):
|
||||
"""
|
||||
Implementation of the graph
|
||||
"""
|
||||
return self.llm.cost_manager.total_cost
|
||||
solution = await self.generate(problem)
|
||||
return solution, self.llm.cost_manager.total_cost
|
||||
|
|
|
|||
|
|
@ -52,17 +52,24 @@ class Operator:
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
class Generate(Operator):
|
||||
"""
|
||||
基于Action Node Fill Function的 Generate 算子
|
||||
"""
|
||||
class Custom(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Custom"):
|
||||
super.__init__(name, llm)
|
||||
|
||||
async def __call__(self, input, instruction):
|
||||
prompt = input + instruction
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class Generate(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Generate"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description):
|
||||
prompt = GENERATE_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
|
||||
async def __call__(self, problem):
|
||||
prompt = GENERATE_PROMPT.format(problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
|
@ -72,11 +79,9 @@ class ContextualGenerate(Operator):
|
|||
super().__init__(name, llm)
|
||||
|
||||
@retry(stop=stop_after_attempt(3))
|
||||
async def __call__(self, problem_description, thought, function_name):
|
||||
prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem_description, thought=thought)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(
|
||||
context=prompt, llm=self.llm, function_name=function_name
|
||||
)
|
||||
async def __call__(self, problem, context):
|
||||
prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem, thought=context)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
|
@ -86,13 +91,13 @@ class CodeGenerate(Operator):
|
|||
super().__init__(name, llm)
|
||||
|
||||
@retry(stop=stop_after_attempt(3))
|
||||
async def __call__(self, problem_description, function_name):
|
||||
prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem_description)
|
||||
async def __call__(self, problem, function_name):
|
||||
prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(
|
||||
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
|
||||
)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return response # {"code": "xxx"}
|
||||
|
||||
|
||||
class CodeContextualGenerate(Operator):
|
||||
|
|
@ -100,49 +105,48 @@ class CodeContextualGenerate(Operator):
|
|||
super().__init__(name, llm)
|
||||
|
||||
@retry(stop=stop_after_attempt(3))
|
||||
async def __call__(self, problem_description, thought, function_name):
|
||||
prompt = CODE_CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem_description, thought=thought)
|
||||
async def __call__(self, problem, thought, function_name):
|
||||
prompt = CODE_CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem, thought=thought)
|
||||
node = await ActionNode.from_pydantic(CodeGenerateOp).fill(
|
||||
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
|
||||
)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return response # {"code": "xxx"}
|
||||
|
||||
|
||||
class Format(Generate):
|
||||
def __init__(self, name: str = "Format", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution):
|
||||
prompt = FORMAT_PROMPT.format(problem_description=problem_description, solution=solution)
|
||||
# 使用JSON MODE 输出 Formatted 的结果
|
||||
async def __call__(self, problem, solution):
|
||||
prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
|
||||
node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return response # {"solution":"xxx"}
|
||||
|
||||
|
||||
class Review(Operator):
|
||||
def __init__(self, criteria, name: str = "Review", llm: LLM = LLM()):
|
||||
def __init__(self, criteria: str = "accuracy", name: str = "Review", llm: LLM = LLM()):
|
||||
self.criteria = criteria
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution):
|
||||
prompt = REVIEW_PROMPT.format(
|
||||
problem_description=problem_description, solution=solution, criteria=self.criteria
|
||||
)
|
||||
async def __call__(self, problem, solution):
|
||||
prompt = REVIEW_PROMPT.format(problem_description=problem, solution=solution, criteria=self.criteria)
|
||||
node = await ActionNode.from_pydantic(ReviewOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return response # {"review_result": True, "feedback": "xxx"}
|
||||
|
||||
|
||||
class Revise(Operator):
|
||||
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution, feedback):
|
||||
prompt = REVISE_PROMPT.format(problem_description=problem_description, solution=solution, feedback=feedback)
|
||||
node = await ActionNode.from_pydantic(ReviseOp).fill(context=prompt, llm=self.llm)
|
||||
async def __call__(self, problem, solution, feedback):
|
||||
prompt = REVISE_PROMPT.format(problem_description=problem, solution=solution, feedback=feedback)
|
||||
node = await ActionNode.from_pydantic(ReviseOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return response # {"solution": "xxx"}
|
||||
|
||||
|
||||
class FuEnsemble(Operator):
|
||||
|
|
@ -153,14 +157,14 @@ class FuEnsemble(Operator):
|
|||
def __init__(self, name: str = "FuEnsemble", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, solutions: List, problem_description):
|
||||
async def __call__(self, solutions: List, problem):
|
||||
solution_text = ""
|
||||
for solution in solutions:
|
||||
solution_text += str(solution) + "\n"
|
||||
prompt = FU_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
prompt = FU_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
return {"solution": response["final_solution"]} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
class MdEnsemble(Operator):
|
||||
|
|
@ -180,7 +184,7 @@ class MdEnsemble(Operator):
|
|||
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
|
||||
return shuffled_solutions, answer_mapping
|
||||
|
||||
async def __call__(self, solutions: List[str], problem_description: str):
|
||||
async def __call__(self, solutions: List[str], problem: str):
|
||||
print(f"solution count: {len(solutions)}")
|
||||
all_responses = []
|
||||
|
||||
|
|
@ -191,7 +195,7 @@ class MdEnsemble(Operator):
|
|||
for index, solution in enumerate(shuffled_solutions):
|
||||
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
|
||||
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
|
||||
|
|
@ -204,7 +208,7 @@ class MdEnsemble(Operator):
|
|||
|
||||
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
|
||||
final_answer = solutions[most_frequent_index]
|
||||
return {"final_solution": final_answer}
|
||||
return {"solution": final_answer} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
class CodeEnsmble(Operator):
|
||||
|
|
@ -219,7 +223,7 @@ class CodeEnsmble(Operator):
|
|||
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
|
||||
return shuffled_solutions, answer_mapping
|
||||
|
||||
async def __call__(self, solutions: List[str], problem_description: str):
|
||||
async def __call__(self, solutions: List[str], problem: str):
|
||||
all_responses = []
|
||||
|
||||
unique_structures = {}
|
||||
|
|
@ -263,7 +267,7 @@ class CodeEnsmble(Operator):
|
|||
f"{chr(65 + index)}: \n weight(proportion of occurrences in all solutions):{weight} \n{code}\n\n\n"
|
||||
)
|
||||
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
|
||||
|
|
@ -277,7 +281,7 @@ class CodeEnsmble(Operator):
|
|||
|
||||
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
|
||||
final_answer = solutions[most_frequent_index]["code"]
|
||||
return {"final_solution": final_answer}
|
||||
return {"solution": final_answer} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
class ScEnsemble(Operator):
|
||||
|
|
@ -291,21 +295,21 @@ class ScEnsemble(Operator):
|
|||
def __init__(self, name: str = "ScEnsemble", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, solutions: List[str], problem_description: str):
|
||||
async def __call__(self, solutions: List[str], problem: str):
|
||||
answer_mapping = {}
|
||||
solution_text = ""
|
||||
for index, solution in enumerate(solutions):
|
||||
answer_mapping[chr(65 + index)] = index
|
||||
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
|
||||
|
||||
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
prompt = SC_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(ScEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
|
||||
answer = response.get("solution_letter", "")
|
||||
answer = answer.strip().upper()
|
||||
|
||||
return {"final_solution": solutions[answer_mapping[answer]]}
|
||||
return {"solution": solutions[answer_mapping[answer]]} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
class Rephrase(Operator):
|
||||
|
|
@ -319,11 +323,11 @@ class Rephrase(Operator):
|
|||
def __init__(self, name: str = "Rephrase", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description: str) -> str:
|
||||
prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
async def __call__(self, problem: str) -> str:
|
||||
prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
return response # {"rephrased_problem": "xxx"}
|
||||
|
||||
|
||||
class Test(Operator):
|
||||
|
|
@ -391,4 +395,5 @@ class Test(Operator):
|
|||
node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
solution = response["refined_solution"]
|
||||
return {"final_solution": solution}
|
||||
|
||||
return {"solution": solution}
|
||||
|
|
|
|||
|
|
@ -7,19 +7,11 @@ from pydantic import BaseModel, Field
|
|||
|
||||
|
||||
class GenerateOp(BaseModel):
|
||||
solution: str = Field(default="", description="Your solution for this problem")
|
||||
response: str = Field(default="", description="Your solution for this problem")
|
||||
|
||||
|
||||
class CodeGenerateOp(BaseModel):
|
||||
code_solution: str = Field(default="", description="Your complete code solution for this problem")
|
||||
|
||||
|
||||
class GenerateCodeSolution(BaseModel):
|
||||
content: str = Field(default="", description="A description of the solution")
|
||||
thought: str = Field(
|
||||
default="",
|
||||
description="Shortly explain why this solution correctly solves the problem. Be specific and detailed regarding the problem rules and goals.",
|
||||
)
|
||||
code: str = Field(default="", description="Your complete code solution for this problem")
|
||||
|
||||
|
||||
class FormatOp(BaseModel):
|
||||
|
|
@ -38,7 +30,7 @@ class ReviewOp(BaseModel):
|
|||
|
||||
|
||||
class ReviseOp(BaseModel):
|
||||
revised_solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
|
||||
solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
|
||||
|
||||
|
||||
class FuEnsembleOp(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,550 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Date : 6/27/2024 17:36 PM
|
||||
# @Author : didi
|
||||
# @Desc : operator demo of ags
|
||||
import ast
|
||||
import random
|
||||
import sys
|
||||
import traceback
|
||||
from collections import Counter
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from tenacity import retry, stop_after_attempt
|
||||
|
||||
from examples.ags.w_action_node.operator_an import (
|
||||
FormatOp,
|
||||
FuEnsembleOp,
|
||||
GenerateCodeBlockOp,
|
||||
GenerateCodeSolution,
|
||||
GenerateOp,
|
||||
MdEnsembleOp,
|
||||
ReflectionTestOp,
|
||||
RephraseOp,
|
||||
ReviewOp,
|
||||
ReviseOp,
|
||||
)
|
||||
from examples.ags.w_action_node.prompt import (
|
||||
DE_ENSEMBLE_ANGEL_PROMPT,
|
||||
DE_ENSEMBLE_CODE_FORMAT_PROMPT,
|
||||
DE_ENSEMBLE_DEVIL_PROMPT,
|
||||
DE_ENSEMBLE_JUDGE_FINAL_PROMPT,
|
||||
DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT,
|
||||
DE_ENSEMBLE_TXT_FORMAT_PROMPT,
|
||||
FORMAT_PROMPT,
|
||||
FU_ENSEMBLE_PROMPT,
|
||||
GENERATE_CODE_SOLUTION_PROMPT,
|
||||
GENERATE_CODEBLOCK_PROMPT,
|
||||
GENERATE_CODEBLOCK_REPHRASE_PROMPT,
|
||||
GENERATE_ON_CONTEXT_PROMPT,
|
||||
GENERATE_PROMPT,
|
||||
MATH_ANSWER_FORMAT_PROMPT,
|
||||
MATH_CORE_PROMPT,
|
||||
MATH_EXTRACT_PROMPT,
|
||||
MATH_GENERATE_PROMPT,
|
||||
MATH_REPHRASE_ON_PROBLEM_PROMPT,
|
||||
MD_ENSEMBLE_PROMPT,
|
||||
REFLECTION_ON_PUBLIC_TEST_PROMPT,
|
||||
REPHRASE_ON_CODE_PROMPT,
|
||||
REPHRASE_ON_PROBLEM_PROMPT,
|
||||
REVIEW_PROMPT,
|
||||
REVISE_PROMPT,
|
||||
)
|
||||
from examples.ags.w_action_node.utils import test_case_2_test_function
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class Operator:
|
||||
def __init__(self, name, llm: LLM):
|
||||
self.name = name
|
||||
self.llm = llm
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Generate(Operator):
|
||||
"""
|
||||
基于Action Node Fill Function的 Generate 算子
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "Generate", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description):
|
||||
prompt = GENERATE_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
async def math_generate(self, problem_description):
|
||||
prompt = MATH_GENERATE_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
async def code_solution_generate(self, problem_description: str, rephrase_problem: str):
|
||||
prompt = GENERATE_CODE_SOLUTION_PROMPT.format(
|
||||
problem_description=problem_description, rephrase_problem=rephrase_problem
|
||||
)
|
||||
node = await ActionNode.from_pydantic(GenerateCodeSolution).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
async def context_solution_generate(self, question, context):
|
||||
prompt = GENERATE_ON_CONTEXT_PROMPT.format(problem_description=question, context=context)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class GenerateCodeBlock(Operator):
|
||||
def __init__(self, name: str = "GenerateCodeBlock", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
@retry(stop=stop_after_attempt(3))
|
||||
async def __call__(self, problem_description, function_name):
|
||||
prompt = GENERATE_CODEBLOCK_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(
|
||||
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
|
||||
)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
@retry(stop=stop_after_attempt(3))
|
||||
async def rephrase_generate(self, problem_description, thought, function_name):
|
||||
prompt = GENERATE_CODEBLOCK_REPHRASE_PROMPT.format(problem_description=problem_description, thought=thought)
|
||||
node = await ActionNode.from_pydantic(GenerateCodeBlockOp).fill(
|
||||
context=prompt, llm=self.llm, mode="code_fill", function_name=function_name
|
||||
)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class Format(Operator):
|
||||
def __init__(self, name: str = "Format", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution):
|
||||
prompt = FORMAT_PROMPT.format(problem_description=problem_description, solution=solution)
|
||||
node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
async def math_answer_format(self, problem_description: str) -> dict:
|
||||
prompt = MATH_ANSWER_FORMAT_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class Review(Operator):
|
||||
def __init__(self, criteria, name: str = "Review", llm: LLM = LLM()):
|
||||
self.criteria = criteria
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution):
|
||||
prompt = REVIEW_PROMPT.format(
|
||||
problem_description=problem_description, solution=solution, criteria=self.criteria
|
||||
)
|
||||
node = await ActionNode.from_pydantic(ReviewOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class Revise(Operator):
|
||||
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description, solution, feedback):
|
||||
prompt = REVISE_PROMPT.format(problem_description=problem_description, solution=solution, feedback=feedback)
|
||||
node = await ActionNode.from_pydantic(ReviseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class FuEnsemble(Operator):
|
||||
"""
|
||||
Function: Critically evaluating multiple solution candidates, synthesizing their strengths, and developing an enhanced, integrated solution.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "FuEnsemble", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, solutions: List, problem_description):
|
||||
solution_text = ""
|
||||
for solution in solutions:
|
||||
solution_text += str(solution) + "\n"
|
||||
prompt = FU_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
|
||||
class MdEnsemble(Operator):
|
||||
"""
|
||||
Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
|
||||
Link: https://arxiv.org/abs/2311.16452
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3):
|
||||
super().__init__(name, llm)
|
||||
self.vote_count = vote_count
|
||||
|
||||
@staticmethod
|
||||
def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
|
||||
shuffled_solutions = solutions.copy()
|
||||
random.shuffle(shuffled_solutions)
|
||||
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
|
||||
return shuffled_solutions, answer_mapping
|
||||
|
||||
async def __call__(self, solutions: List[str], problem_description: str):
|
||||
print(f"solution count: {len(solutions)}")
|
||||
all_responses = []
|
||||
|
||||
for _ in range(self.vote_count):
|
||||
shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
|
||||
|
||||
solution_text = ""
|
||||
for index, solution in enumerate(shuffled_solutions):
|
||||
solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
|
||||
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
|
||||
answer = response.get("solution_letter", "")
|
||||
answer = answer.strip().upper()
|
||||
|
||||
if answer in answer_mapping:
|
||||
original_index = answer_mapping[answer]
|
||||
all_responses.append(original_index)
|
||||
|
||||
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
|
||||
final_answer = solutions[most_frequent_index]
|
||||
return {"final_solution": final_answer}
|
||||
|
||||
|
||||
class CodeEnsmble(Operator):
|
||||
def __init__(self, name: str = "CodeEnsemble", llm: LLM = LLM(), vote_count: int = 3):
|
||||
super().__init__(name, llm)
|
||||
self.vote_count = vote_count
|
||||
|
||||
@staticmethod
|
||||
def shuffle_answers(solutions: List[dict]) -> Tuple[List[str], Dict[str, str]]:
|
||||
shuffled_solutions = solutions.copy()
|
||||
random.shuffle(shuffled_solutions)
|
||||
answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
|
||||
return shuffled_solutions, answer_mapping
|
||||
|
||||
async def __call__(self, solutions: List[str], problem_description: str):
|
||||
all_responses = []
|
||||
|
||||
unique_structures = {}
|
||||
unique_structures_count = {}
|
||||
|
||||
valid_solutions_count = 0 # 添加计数器来跟踪有效的解决方案数量
|
||||
|
||||
for solution in solutions:
|
||||
try:
|
||||
tree = ast.parse(solution)
|
||||
structure_key = ast.dump(tree, annotate_fields=False, include_attributes=False)
|
||||
|
||||
if structure_key not in unique_structures:
|
||||
unique_structures[structure_key] = solution
|
||||
unique_structures_count[structure_key] = 1
|
||||
else:
|
||||
unique_structures_count[structure_key] += 1
|
||||
|
||||
valid_solutions_count += 1 # 增加有效解决方案的计数
|
||||
except SyntaxError:
|
||||
# 剔除语法错误的代码
|
||||
continue
|
||||
|
||||
solutions = [
|
||||
{"code": unique_structures[structure_key], "weight": count / valid_solutions_count} # 使用有效解决方案的数量来计算权重
|
||||
for structure_key, count in unique_structures_count.items()
|
||||
]
|
||||
|
||||
updated_length = len(solutions)
|
||||
if updated_length == 1:
|
||||
return {"final_solution": solutions[0]["code"]}
|
||||
|
||||
for _ in range(self.vote_count):
|
||||
shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
|
||||
|
||||
solution_text = ""
|
||||
for index, solution in enumerate(shuffled_solutions):
|
||||
weight = str(solution["weight"])
|
||||
code = solution["code"]
|
||||
solution_text += (
|
||||
f"{chr(65 + index)}: \n weight(proportion of occurrences in all solutions):{weight} \n{code}\n\n\n"
|
||||
)
|
||||
|
||||
prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
|
||||
answer = response.get("solution_letter", "")
|
||||
answer = answer.strip().upper()
|
||||
|
||||
if answer in answer_mapping:
|
||||
original_index = answer_mapping[answer]
|
||||
# print(f"original index: {original_index}")
|
||||
all_responses.append(original_index)
|
||||
|
||||
most_frequent_index = Counter(all_responses).most_common(1)[0][0]
|
||||
final_answer = solutions[most_frequent_index]["code"]
|
||||
return {"final_solution": final_answer}
|
||||
|
||||
|
||||
class ScEnsemble(Operator):
|
||||
"""
|
||||
Paper: Self-Consistency Improves Chain of Thought Reasoning in Language Models
|
||||
Link: https://arxiv.org/abs/2203.11171
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class MADEnsemble(Operator):
|
||||
"""
|
||||
Paper: Should we be going MAD? A Look at Multi-Agent Debate Strategies for LLMs
|
||||
Link: https://arxiv.org/abs/2311.17371
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "DebateEnsemble", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
self.agents = ["angel", "devil", "judge"]
|
||||
self.format_requirements = {"txt": DE_ENSEMBLE_TXT_FORMAT_PROMPT, "code": DE_ENSEMBLE_CODE_FORMAT_PROMPT}
|
||||
|
||||
def get_system_prompt(self, name: str, mode: str = "txt"):
|
||||
if name == "angel":
|
||||
if mode == "code":
|
||||
return DE_ENSEMBLE_ANGEL_PROMPT + "\n" + DE_ENSEMBLE_CODE_FORMAT_PROMPT
|
||||
return DE_ENSEMBLE_ANGEL_PROMPT + "\n" + DE_ENSEMBLE_TXT_FORMAT_PROMPT
|
||||
elif name == "devil":
|
||||
if mode == "code":
|
||||
return DE_ENSEMBLE_DEVIL_PROMPT + "\n" + DE_ENSEMBLE_CODE_FORMAT_PROMPT
|
||||
return DE_ENSEMBLE_DEVIL_PROMPT + "\n" + DE_ENSEMBLE_TXT_FORMAT_PROMPT
|
||||
elif name == "judge":
|
||||
if mode == "final":
|
||||
return DE_ENSEMBLE_JUDGE_FINAL_PROMPT
|
||||
return DE_ENSEMBLE_JUDGE_UNIVERSAL_PROMPT
|
||||
|
||||
def construct_messages(self, message_history_with_name, name, mode: str = "txt", phase: str = "universal"):
|
||||
"""
|
||||
基于name与mode来构建system message.
|
||||
基于name来构建messages
|
||||
"""
|
||||
messages = []
|
||||
messages.append({"role": "system", "content": self.get_system_prompt(name, mode)})
|
||||
|
||||
if name in ["angel", "devil"]:
|
||||
messages = self._construct_debate(message_history_with_name, name, messages)
|
||||
elif name == "judge":
|
||||
messages = self._construct_judge(message_history_with_name, mode, messages)
|
||||
return messages
|
||||
|
||||
def _construct_debate(self, message_history_with_name, name, messages):
|
||||
user_message = ""
|
||||
|
||||
for message in message_history_with_name:
|
||||
if message["name"] == "Judge":
|
||||
continue
|
||||
elif message["name"] == name:
|
||||
if user_message:
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"name": "user",
|
||||
"content": user_message.strip("\n"),
|
||||
}
|
||||
)
|
||||
messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"name": name,
|
||||
"content": message["content"],
|
||||
}
|
||||
)
|
||||
user_message = ""
|
||||
else:
|
||||
user_message += message["content"]
|
||||
|
||||
if user_message:
|
||||
messages.append(
|
||||
{
|
||||
"role": "user",
|
||||
"name": "user",
|
||||
"content": user_message.strip("\n"),
|
||||
}
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
def _construct_judge(self, message_history_with_name, mode, messages):
|
||||
pass
|
||||
|
||||
async def debate_answer(self, message_history: List, role: str = "angel"):
|
||||
messages = self.construct_messages(message_history, role)
|
||||
response = await self.llm.acompletion_text(messages=messages)
|
||||
message_history.append({"role": "user", "name": role, "content": response})
|
||||
return message_history, response
|
||||
|
||||
async def judge_answer(self, message_history: List, phase: str = "universal"):
|
||||
messages = self.construct_messages(message_history, "judge", phase=phase)
|
||||
response = await self.llm.acompletion_text(messages=messages)
|
||||
message_history.append({"role": "user", "name": "judge", "content": response})
|
||||
return message_history, response
|
||||
|
||||
async def __call__(self, origin_solution: str, problem_description: str, max_round: int = 3, mode: str = "txt"):
|
||||
# 思路,输入一个原始答案,构建一个agent代表这个答案进行辩论;另一个agent(devil)使用debate llm的内容进行辩论;法官在每一轮次做出决定是否终止,到了maxround还没终止就由法官进行总结。
|
||||
message_history_with_name = [{"role": "user", "name": "angel", "content": origin_solution}]
|
||||
|
||||
for index in range(max_round):
|
||||
for agent in self.agents:
|
||||
if agent == "angel":
|
||||
if index == 0:
|
||||
pass
|
||||
message_history_with_name, rsp = self.debate_answer(message_history_with_name, role="angel")
|
||||
elif agent == "devil":
|
||||
message_history_with_name, rsp = self.debate_answer(message_history_with_name, role="devil")
|
||||
elif agent == "judge":
|
||||
message_history_with_name, judge_result = self.judge_answer(
|
||||
message_history_with_name, phase="universal"
|
||||
)
|
||||
if not judge_result["is_debating"]:
|
||||
"""
|
||||
这里需要在 self.judge_answer 中设置一个自动给出solution的地方
|
||||
"""
|
||||
return {"final_solution": judge_result["final_solution"]}
|
||||
|
||||
message_history_with_name.pop(-1)
|
||||
message_history_with_name, judge_answer = self.judge_answer(message_history_with_name, phase="final")
|
||||
|
||||
return {"final_solution": judge_answer["debate_answer"]}
|
||||
|
||||
|
||||
class Rephrase(Operator):
|
||||
"""
|
||||
Paper: Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
|
||||
Link: https://arxiv.org/abs/2404.14963
|
||||
Paper: Achieving >97% on GSM8K: Deeply Understanding the Problems Makes LLMs Better Solvers for Math Word Problems
|
||||
Link: https://arxiv.org/abs/2404.14963
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "Rephrase", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem_description: str) -> str:
|
||||
prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
|
||||
async def code_rephrase(self, problem_description: str) -> str:
|
||||
prompt = REPHRASE_ON_CODE_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
|
||||
async def math_rephrase(self, problem_description: str) -> str:
|
||||
prompt = MATH_REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
|
||||
async def math_core(self, problem_description: str) -> str:
|
||||
prompt = MATH_CORE_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
|
||||
async def math_extract(self, problem_description: str) -> str:
|
||||
prompt = MATH_EXTRACT_PROMPT.format(problem_description=problem_description)
|
||||
node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response["rephrased_problem"]
|
||||
|
||||
|
||||
class Test(Operator):
|
||||
def __init__(self, name: str = "Test", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
def exec_code(self, solution, test_cases, problem_id, entry_point):
|
||||
fail_cases = []
|
||||
for test_case in test_cases:
|
||||
test_code = test_case_2_test_function(solution, test_case, entry_point)
|
||||
try:
|
||||
exec(test_code, globals())
|
||||
except AssertionError as e:
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
|
||||
with open("tester.txt", "a") as f:
|
||||
f.write("test_error" + problem_id + "\n")
|
||||
error_infomation = {
|
||||
"test_fail_case": {
|
||||
"test_case": test_case,
|
||||
"error_type": "AssertionError",
|
||||
"error_message": str(e),
|
||||
"traceback": tb_str,
|
||||
}
|
||||
}
|
||||
fail_cases.append(error_infomation)
|
||||
logger.info(f"test error: {error_infomation}")
|
||||
except Exception as e:
|
||||
with open("tester.txt", "a") as f:
|
||||
f.write(problem_id + "\n")
|
||||
return {"exec_fail_case": str(e)}
|
||||
if fail_cases != []:
|
||||
return fail_cases
|
||||
else:
|
||||
return "no error"
|
||||
|
||||
async def __call__(self, problem_id, problem, rephrase_problem, solution, test_cases, entry_point, test_loop):
|
||||
solution = solution["final_solution"]
|
||||
for _ in range(test_loop):
|
||||
result = self.exec_code(solution, test_cases, problem_id, entry_point)
|
||||
if result == "no error":
|
||||
return {"final_solution": solution}
|
||||
elif "exec_fail_case" in result:
|
||||
result = result["exec_fail_case"]
|
||||
prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
|
||||
problem_description=problem,
|
||||
rephrase_problem=rephrase_problem,
|
||||
code_solution=solution,
|
||||
exec_pass=f"executed unsuccessfully, error: \n {result}",
|
||||
test_fail="executed unsucessfully",
|
||||
)
|
||||
node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
solution = response["refined_solution"]
|
||||
else:
|
||||
prompt = REFLECTION_ON_PUBLIC_TEST_PROMPT.format(
|
||||
problem_description=problem,
|
||||
rephrase_problem=rephrase_problem,
|
||||
code_solution=solution,
|
||||
exec_pass="executed successfully",
|
||||
test_fail=result,
|
||||
)
|
||||
node = await ActionNode.from_pydantic(ReflectionTestOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
solution = response["refined_solution"]
|
||||
return {"final_solution": solution}
|
||||
|
||||
|
||||
class FindFact(Operator):
|
||||
def __init__(self, name: str = "FindFact", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
|
||||
class SelfAsk(Operator):
|
||||
def __init__(self, name: str = "SelfAsk", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
|
||||
class Verify(Operator):
|
||||
def __init__(self, name: str = "Verify", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Date : 6/27/2024 22:07 PM
|
||||
# @Author : didi
|
||||
# @Desc : Basic Graph Class
|
||||
|
||||
# from typing import Literal
|
||||
|
||||
# from examples.ags.w_action_node.optimized.Gsm8K.operators.template.op_prompt import *
|
||||
# from examples.ags.w_action_node.optimized.Gsm8K.operators.template.operator import *
|
||||
# from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
# from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
# DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
|
||||
|
||||
|
||||
# class SolveGraph:
|
||||
# def __init__(
|
||||
# self,
|
||||
# name: str,
|
||||
# llm_config,
|
||||
# dataset: DatasetType,
|
||||
# ) -> None:
|
||||
# self.name = name
|
||||
# self.dataset = dataset
|
||||
# self.llm = create_llm_instance(llm_config)
|
||||
# self.llm.cost_manager = CostManager()
|
||||
# self.generate = Generate(self.llm)
|
||||
|
||||
# async def __call__(self, problem: str):
|
||||
# """
|
||||
# Implementation of the graph based on the generate operator, you can modify it to fit operators you want to use.
|
||||
# For Example, for Custom Operator, you can add self.custom = Custom(self.llm) and call it in the __call__ method
|
||||
# """
|
||||
# solution = await self.generate(problem)
|
||||
# return solution, self.llm.cost_manager.total_cost
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
Generate_PROMPT = """
|
||||
Generate Solution for the following problem: {problem_description}
|
||||
"""
|
||||
|
||||
FORMAT_PROMPT = """
|
||||
For the question described as {problem_description},
|
||||
please extract a short and concise answer contains only one word/few words from the following solution: {solution}.
|
||||
Make sure there are no additional comments or explanations in your response.
|
||||
"""
|
||||
|
||||
|
||||
ContextualGenerate_PROMPT = """
|
||||
Generate Solution for the following problem:
|
||||
|
||||
## Problem Description
|
||||
{problem_description}
|
||||
|
||||
## Thought
|
||||
{thought}
|
||||
"""
|
||||
|
||||
REVIEW_PROMPT = """
|
||||
For the question described as {problem_description},
|
||||
please review the following solution: {solution}, and provide a review result in boolean format.
|
||||
```
|
||||
You will be reviewing the problem-solving process of another AI assistant that has answered a mathematical question. Your task is to evaluate the solution and provide a detailed review for refinement. Follow these steps:
|
||||
<step1>
|
||||
Carefully read through the original question and entire solution, paying close attention to the relevant concepts, thinking process, calculations, and final result. Assess whether the solution is clear, logical, and well-organized. Write your initial review in <initialReview> tags.
|
||||
</step1>
|
||||
<step2>
|
||||
Evaluate the reasoning and logic behind the solution. Ensure that the thinking process is clear, coherent, and mathematically sound. If you find any areas that need clarification or improvement, provide your suggestions inside <reasoningFeedback> tags.
|
||||
</step2>
|
||||
<step3>
|
||||
Re-do the calculations presented in the <calculation> section **carefully and step-by-step** to verify the accuracy. Break down the calculations into the simplest possible steps and check each step for errors. You must not be careless and treat every part with rigor. Don't neglect checking any calculation part of the solution process. If you find any mistakes, note them down inside <calculationErrors> tags.
|
||||
</step3>
|
||||
<step4>
|
||||
Provide an overall assessment of the solution's thoroughness, accuracy, and clarity inside <overallAssessment> tags. Highlight the strengths and weaknesses of the solution and offer suggestions for improvement, if any.
|
||||
</step4>
|
||||
use XML tags to present your complete evaluation, including initial review, calculation errors, reasoning feedback, and overall assessment, in a well-organized and easy-to-follow format.
|
||||
Remember to be thorough, constructive, and professional in your review. Your goal is to help improve the quality and accuracy of the mathematical problem-solving process.
|
||||
```
|
||||
If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your comments
|
||||
"""
|
||||
|
||||
REVISE_PROMPT = """
|
||||
For the question described as {problem_description},
|
||||
please evaluate and revise the solution provided: {solution}, taking into account the review feedbacks: {feedback}."
|
||||
Then output the revised solution.
|
||||
"""
|
||||
|
||||
FU_ENSEMBLE_PROMPT = """
|
||||
### Given problem
|
||||
|
||||
{problem_description}
|
||||
|
||||
### We've got a list of solutions
|
||||
|
||||
<solutions>
|
||||
{solutions}
|
||||
</solutions>
|
||||
|
||||
### Instructions
|
||||
Based on the given problem and solution candidates:
|
||||
|
||||
1. Analyze the pros and cons of each candidate solution
|
||||
2. Consider how to integrate reasonable parts from different solutions
|
||||
3. Formulate a more comprehensive and effective solution
|
||||
"""
|
||||
|
||||
MD_ENSEMBLE_PROMPT = """
|
||||
You are given a coding problem:
|
||||
{problem_description}
|
||||
|
||||
Here is a list of possible solutions to the problem:
|
||||
{solutions}
|
||||
|
||||
Using the inputs above, your goal is to choose the best solution to the code contest problem.
|
||||
Don't just pick the most efficient solution. The main consideration is that the solution can fully solve the problem in a correct and robust manner.
|
||||
Provide your final decision by writing the chosen solution letter.
|
||||
|
||||
Please maintain the JSON format in your response.
|
||||
"""
|
||||
|
||||
SC_ENSEMBLE_PROMPT = """
|
||||
I have generated the following solutions to the question: {problem_description}
|
||||
|
||||
{solutions}
|
||||
|
||||
Evaluate these solutions.
|
||||
Select the most consistent solution based on majority consensus.
|
||||
Give your answer with a single id of solution (without anything else).
|
||||
"""
|
||||
|
||||
REPHRASE_ON_PROBLEM_PROMPT = """
|
||||
You are given a code contest problem:
|
||||
|
||||
### problem
|
||||
{problem_description}
|
||||
|
||||
### instrcutions
|
||||
Given the problem, Your Goal is:
|
||||
Reflect on the problem, and describe it in your own words, in bullet points. Pay attention to small details, nuances, notes and examples in the problem description.
|
||||
"""
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
# import random
|
||||
# from collections import Counter
|
||||
# from typing import Dict, List, Tuple
|
||||
|
||||
# from tenacity import retry, stop_after_attempt
|
||||
|
||||
# from examples.ags.w_action_node.operator import Operator
|
||||
# from examples.ags.w_action_node.optimized.Gsm8K.operators.template.op_prompt import *
|
||||
# from examples.ags.w_action_node.optimized.Gsm8K.operators.template.operator_an import *
|
||||
# from metagpt.actions.action_node import ActionNode
|
||||
# from metagpt.llm import LLM
|
||||
|
||||
|
||||
# class Custom(Operator):
|
||||
# def __init__(self, llm: LLM, name: str = "Custom"):
|
||||
# super.__init__(name, llm)
|
||||
|
||||
# async def __call__(self, input, instruction):
|
||||
# prompt = input + instruction
|
||||
# node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response
|
||||
|
||||
|
||||
# class Generate(Operator):
|
||||
# def __init__(self, llm: LLM, name: str = "Generate"):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# async def __call__(self, problem):
|
||||
# prompt = Generate_PROMPT.format(problem_description=problem)
|
||||
# node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response
|
||||
|
||||
|
||||
# class ContextualGenerate(Operator):
|
||||
# def __init__(self, llm: LLM, name: str = "ContextualGenerate"):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# @retry(stop=stop_after_attempt(3))
|
||||
# async def __call__(self, problem, context):
|
||||
# prompt = ContextualGenerate_PROMPT.format(problem_description=problem, thought=context)
|
||||
# # prompt = CONTEXTUAL_GENERATE_PROMPT.format(problem_description=problem, thought=context)
|
||||
# node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response
|
||||
|
||||
|
||||
# class Format(Generate):
|
||||
# def __init__(self, name: str = "Format", llm: LLM = LLM()):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# # 使用JSON MODE 输出 Formatted 的结果
|
||||
# async def __call__(self, problem, solution):
|
||||
# prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
|
||||
# node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response # {"solution":"xxx"}
|
||||
|
||||
|
||||
# class Review(Operator):
|
||||
# def __init__(self, criteria: str = "accuracy", name: str = "Review", llm: LLM = LLM()):
|
||||
# self.criteria = criteria
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# async def __call__(self, problem, solution):
|
||||
# prompt = REVIEW_PROMPT.format(problem_description=problem, solution=solution, criteria=self.criteria)
|
||||
# node = await ActionNode.from_pydantic(ReviewOp).fill(context=prompt, llm=self.llm)
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response # {"review_result": True, "feedback": "xxx"}
|
||||
|
||||
|
||||
# class Revise(Operator):
|
||||
# def __init__(self, name: str = "Revise", llm: LLM = LLM()):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# async def __call__(self, problem, solution, feedback):
|
||||
# prompt = REVISE_PROMPT.format(problem_description=problem, solution=solution, feedback=feedback)
|
||||
# node = await ActionNode.from_pydantic(ReviseOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response # {"solution": "xxx"}
|
||||
|
||||
|
||||
# class FuEnsemble(Operator):
|
||||
# """
|
||||
# Function: Critically evaluating multiple solution candidates, synthesizing their strengths, and developing an enhanced, integrated solution.
|
||||
# """
|
||||
|
||||
# def __init__(self, name: str = "FuEnsemble", llm: LLM = LLM()):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# async def __call__(self, solutions: List, problem):
|
||||
# solution_text = ""
|
||||
# for solution in solutions:
|
||||
# solution_text += str(solution) + "\n"
|
||||
# prompt = FU_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
# node = await ActionNode.from_pydantic(FuEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return {"solution": response["final_solution"]} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
# class MdEnsemble(Operator):
|
||||
# """
|
||||
# Paper: Can Generalist Foundation Models Outcompete Special-Purpose Tuning? Case Study in Medicine
|
||||
# Link: https://arxiv.org/abs/2311.16452
|
||||
# """
|
||||
|
||||
# def __init__(self, name: str = "MdEnsemble", llm: LLM = LLM(), vote_count: int = 3):
|
||||
# super().__init__(name, llm)
|
||||
# self.vote_count = vote_count
|
||||
|
||||
# @staticmethod
|
||||
# def shuffle_answers(solutions: List[str]) -> Tuple[List[str], Dict[str, str]]:
|
||||
# shuffled_solutions = solutions.copy()
|
||||
# random.shuffle(shuffled_solutions)
|
||||
# answer_mapping = {chr(65 + i): solutions.index(solution) for i, solution in enumerate(shuffled_solutions)}
|
||||
# return shuffled_solutions, answer_mapping
|
||||
|
||||
# async def __call__(self, solutions: List[str], problem: str):
|
||||
# print(f"solution count: {len(solutions)}")
|
||||
# all_responses = []
|
||||
|
||||
# for _ in range(self.vote_count):
|
||||
# shuffled_solutions, answer_mapping = self.shuffle_answers(solutions)
|
||||
|
||||
# solution_text = ""
|
||||
# for index, solution in enumerate(shuffled_solutions):
|
||||
# solution_text += f"{chr(65 + index)}: \n{str(solution)}\n\n\n"
|
||||
|
||||
# prompt = MD_ENSEMBLE_PROMPT.format(solutions=solution_text, problem_description=problem)
|
||||
# node = await ActionNode.from_pydantic(MdEnsembleOp).fill(context=prompt, llm=self.llm)
|
||||
# response = node.instruct_content.model_dump()
|
||||
|
||||
# answer = response.get("solution_letter", "")
|
||||
# answer = answer.strip().upper()
|
||||
|
||||
# if answer in answer_mapping:
|
||||
# original_index = answer_mapping[answer]
|
||||
# all_responses.append(original_index)
|
||||
|
||||
# most_frequent_index = Counter(all_responses).most_common(1)[0][0]
|
||||
# final_answer = solutions[most_frequent_index]
|
||||
# return {"solution": final_answer} # {"final_solution": "xxx"}
|
||||
|
||||
|
||||
# class Rephrase(Operator):
|
||||
# """
|
||||
# Paper: Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
|
||||
# Link: https://arxiv.org/abs/2404.14963
|
||||
# Paper: Achieving >97% on GSM8K: Deeply Understanding the Problems Makes LLMs Better Solvers for Math Word Problems
|
||||
# Link: https://arxiv.org/abs/2404.14963
|
||||
# """
|
||||
|
||||
# def __init__(self, name: str = "Rephrase", llm: LLM = LLM()):
|
||||
# super().__init__(name, llm)
|
||||
|
||||
# async def __call__(self, problem: str) -> str:
|
||||
# prompt = REPHRASE_ON_PROBLEM_PROMPT.format(problem_description=problem)
|
||||
# node = await ActionNode.from_pydantic(RephraseOp).fill(context=prompt, llm=self.llm, mode="single_fill")
|
||||
# response = node.instruct_content.model_dump()
|
||||
# return response # {"rephrased_problem": "xxx"}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
# from typing import List
|
||||
|
||||
# from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# class GenerateOp(BaseModel):
|
||||
# response: str = Field(default="", description="Your solution for this problem")
|
||||
|
||||
|
||||
# class FormatOp(BaseModel):
|
||||
# solution: str = Field(default="", description="Your formatted answer for this problem")
|
||||
|
||||
|
||||
# class ReviewOp(BaseModel):
|
||||
# review_result: bool = Field(
|
||||
# default=False,
|
||||
# description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
|
||||
# )
|
||||
# feedback: str = Field(
|
||||
# default="",
|
||||
# description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
|
||||
# )
|
||||
|
||||
|
||||
# class ReviseOp(BaseModel):
|
||||
# solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
|
||||
|
||||
|
||||
# class FuEnsembleOp(BaseModel):
|
||||
# thought: str = Field(
|
||||
# default="",
|
||||
# description="Analyze the solutions and think how to combine the advantages of various solutions to form the best possible solution.",
|
||||
# )
|
||||
# final_solution: str = Field(default="", description="Output the final solution after analysis and integration")
|
||||
|
||||
|
||||
# class MdEnsembleOp(BaseModel):
|
||||
# thought: str = Field(
|
||||
# default="""Example thought process:
|
||||
# 1. Examined the 'compare_one' function.
|
||||
# 2. The function correctly handles both numeric and string inputs by converting strings to floats.
|
||||
# 3. It properly compares two values and returns the larger one.
|
||||
# 4. The function returns None if the values are equal, which might be useful in some contexts but could be improved by returning either value.
|
||||
# 5. The use of 'isinstance' for type checking is a good practice.
|
||||
# 6. The function handles decimal separators well by replacing ',' with '.'.
|
||||
# Overall, this solution effectively solves the problem of comparing two values, with good error handling and flexibility. It could be improved by specifying behavior for equal values, but it's a strong solution as is.""",
|
||||
# description="Step-by-step analysis of the solutions to determine the best one.",
|
||||
# )
|
||||
# solution_letter: str = Field(default="", description="The letter of the chosen best solution (only one letter).")
|
||||
|
||||
|
||||
# class RephraseOp(BaseModel):
|
||||
# rephrased_problem: str = Field(default="", description="Rephrased problem description for this problem")
|
||||
|
||||
|
||||
# class ScEnsembleOp(BaseModel):
|
||||
# solution_letter: str = Field(default="", description="The letter of most consistent solution.")
|
||||
|
||||
|
||||
# class StepByStepOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="A list of steps to solve the problem")
|
||||
|
||||
|
||||
# class DecomposeOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps to solve the problem")
|
||||
|
||||
|
||||
# class MathStepBreakdownOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps to solve the math problem")
|
||||
|
||||
|
||||
# class MathSolveOp(BaseModel):
|
||||
# solution: str = Field(default="", description="Step-by-step solution to the mathematical problem")
|
||||
|
||||
|
||||
# class MathProblemDecomposerOp(BaseModel):
|
||||
# decomposed_steps: List[str] = Field(default_factory=list, description="List of step-by-step subproblems")
|
||||
|
||||
|
||||
# class MathBreakdownOp(BaseModel):
|
||||
# breakdown: str = Field(default="", description="Detailed breakdown of the math problem solution")
|
||||
|
||||
|
||||
# class DecomposeOp(BaseModel):
|
||||
# sub_problems: List[str] = Field(
|
||||
# default_factory=list, description="List of sub-problems derived from the main problem"
|
||||
# )
|
||||
|
||||
|
||||
# class CriticalMathSolverOp(BaseModel):
|
||||
# solution: str = Field(default="", description="Detailed step-by-step solution with critical thinking applied")
|
||||
# validation: bool = Field(default=False, description="Whether the solution has been validated as correct")
|
||||
|
||||
|
||||
# class DecomposeOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of decomposed steps for solving the problem")
|
||||
|
||||
|
||||
# class SimplifyOp(BaseModel):
|
||||
# simplified_steps: List[str] = Field(
|
||||
# default_factory=list, description="List of simplified steps to solve the problem"
|
||||
# )
|
||||
|
||||
|
||||
# class MathDecomposeOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps to solve the math problem")
|
||||
|
||||
|
||||
# class DecomposeOp(BaseModel):
|
||||
# subproblems: List[str] = Field(default_factory=list, description="List of subproblems")
|
||||
|
||||
|
||||
# class StepByStepOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps to solve the problem")
|
||||
# final_solution: str = Field(default="", description="The final answer to the problem")
|
||||
|
||||
|
||||
# class MathStepByStepOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of step-by-step solutions")
|
||||
# final_answer: str = Field(default="", description="The final answer to the math problem")
|
||||
|
||||
|
||||
# class MathStepBreakdownOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps to solve the math problem")
|
||||
|
||||
|
||||
# class DecomposeOp(BaseModel):
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps or sub-problems")
|
||||
|
||||
|
||||
# class BreakdownAndSolveOp(BaseModel):
|
||||
# solution: str = Field(default="", description="The final solution to the problem")
|
||||
# steps: List[str] = Field(default_factory=list, description="List of steps taken to solve the problem")
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
class Gsm8kGraph(Graph):
|
||||
def __init__(self, name: str, llm: LLM, criteria: str, vote_count: int = 5) -> None:
|
||||
super().__init__(name, llm)
|
||||
self.criteria = criteria
|
||||
self.generate = Generate(llm=llm)
|
||||
self.rephrase = Rephrase(llm=llm)
|
||||
self.fuensemble = FuEnsemble(llm=llm)
|
||||
self.mdensemble = MdEnsemble(llm=llm, vote_count=vote_count)
|
||||
self.review = Review(llm=llm, criteria=criteria)
|
||||
self.revise = Revise(llm=llm)
|
||||
self.format = Format(llm=llm)
|
||||
|
||||
async def __call__(self, problem: str):
|
||||
rephrased_problem = await self.rephrase.math_rephrase(problem)
|
||||
solution = await self.generate.math_generate(rephrased_problem)
|
||||
formatted_solution = await self.format.math_answer_format(solution["solution"])
|
||||
return formatted_solution
|
||||
|
||||
async def baseline(self, problem: str):
|
||||
solution = await self.generate(problem)
|
||||
formatted_solution = await self.format.math_answer_format(solution["solution"])
|
||||
return formatted_solution
|
||||
|
||||
async def simple_ensemble(self, problem: str, ensemble_count: int = 3):
|
||||
rephrased_problem = await self.rephrase.math_rephrase(problem)
|
||||
solution_list = []
|
||||
answer_list = []
|
||||
|
||||
for _ in range(ensemble_count):
|
||||
solution = await self.generate.math_generate(rephrased_problem)
|
||||
solution = solution.get("solution")
|
||||
answer = await self.format.math_answer_format(solution)
|
||||
solution_list.append(solution)
|
||||
answer_list.append(answer)
|
||||
|
||||
if len(set(answer.get("solution") for answer in answer_list)) == 1:
|
||||
formatted_solution = answer_list[0]
|
||||
else:
|
||||
# TODO 我个人感觉针对数学这种情景,使用self consistency 的ensemble方法可能会更好
|
||||
solution = await self.mdensemble("math", solution_list, problem)
|
||||
formatted_solution = await self.format.math_answer_format(solution["final_solution"])
|
||||
|
||||
return formatted_solution
|
||||
|
||||
async def single_solve(self, problem: str, max_loop: int = 3):
|
||||
rephrased_problem = await self.rephrase.math_rephrase(problem)
|
||||
solution = await self.generate.math_generate(rephrased_problem)
|
||||
for _ in range(max_loop):
|
||||
review_feedback = await self.review(rephrased_problem, solution["solution"])
|
||||
if review_feedback["review_result"]:
|
||||
break
|
||||
solution = await self.revise(rephrased_problem, solution["solution"], review_feedback["feedback"])
|
||||
solution = solution.get("revised_solution")
|
||||
formatted_solution = await self.format.math_answer_format(solution)
|
||||
return formatted_solution
|
||||
|
||||
async def cot_ensemble(self, problem: str, ensemble_count: int = 1):
|
||||
solution_list = []
|
||||
for _ in range(ensemble_count):
|
||||
core = await self.rephrase.math_core(problem)
|
||||
extract = await self.rephrase.math_extract(problem)
|
||||
formatted_problem = (
|
||||
f"### Problem\n{problem}\n### Problem-Solving Info\n{extract}\n### Core Question\n{core}\n"
|
||||
)
|
||||
solution = await self.generate.math_generate(formatted_problem) # 等待 generate 方法完成
|
||||
solution0 = solution.get("solution")
|
||||
solution_list.append(solution0)
|
||||
solution = await self.fuensemble(solution_list, problem)
|
||||
solution0 = solution["solution"]
|
||||
formatted_solution = await self.format.math_answer_format(solution)
|
||||
return formatted_solution
|
||||
|
||||
async def cot(self, problem: str):
|
||||
core = await self.rephrase.math_core(problem)
|
||||
extract = await self.rephrase.math_extract(problem)
|
||||
formatted_problem = f"### Problem\n{problem}\n### Problem-Solving Info\n{extract}\n### Core Question\n{core}\n"
|
||||
solution = await self.generate.math_generate(formatted_problem) # 等待 generate 方法完成
|
||||
solution.get("solution")
|
||||
formatted_solution = await self.format.math_answer_format(solution)
|
||||
|
||||
return formatted_solution
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
# @Author : issac
|
||||
# @Desc : optimizer for graph
|
||||
|
||||
import ast
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
|
@ -19,12 +20,17 @@ from examples.ags.w_action_node.prompts.optimize_prompt import (
|
|||
GRAPH_INPUT,
|
||||
GRAPH_OPTIMIZE_PROMPT,
|
||||
GRAPH_TEMPLATE,
|
||||
OPERATOR_INPUT,
|
||||
OPERATOR_CODE_EXAMPLES,
|
||||
OPERATOR_EXTEND_INPUT_PROMPT,
|
||||
OPERATOR_EXTEND_PROMPT,
|
||||
OPERATOR_OPTIMIZE_GRAPH_EXAMPLE,
|
||||
OPERATOR_OPTIMIZE_INPUT_PROMPT,
|
||||
OPERATOR_OPTIMIZE_PROMPT,
|
||||
OPERATOR_SELECT_INPUT_PROMPT,
|
||||
OPERATOR_SELECT_PROMPT,
|
||||
OPERATOR_TEMPLATE,
|
||||
)
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.logs import logger
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
|
||||
config_iterate_path = "iterate"
|
||||
|
|
@ -32,11 +38,25 @@ config_iterate_path = "iterate"
|
|||
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
|
||||
OptimizerType = Literal["Complete", "Graph", "Operator"]
|
||||
|
||||
evaluator = Evaluator(eval_path="eval")
|
||||
|
||||
class OperatorExtend(BaseModel):
|
||||
name: str = Field(default="", description="name")
|
||||
description: str = Field(default="", description="description")
|
||||
interface: str = Field(default="", description="interface")
|
||||
prompt_variable_name: str = Field(default="", description="prompt_name")
|
||||
prompt: str = Field(default="", description="prompt")
|
||||
code: str = Field(default="", description="code")
|
||||
|
||||
|
||||
class OperatorOptimize(BaseModel):
|
||||
pass
|
||||
class OperatorSelect(BaseModel):
|
||||
selected_operators: str = Field(default="", description="selected operators")
|
||||
|
||||
|
||||
class OperatorOptimze(BaseModel):
|
||||
modification: str = Field(default="", description="modification")
|
||||
solvegraph: str = Field(default="", description="solvegraph")
|
||||
operator_description: str = Field(default="", description="operator_description")
|
||||
prompt: str = Field(default="", description="prompt")
|
||||
|
||||
|
||||
class GraphOptimize(BaseModel):
|
||||
|
|
@ -60,6 +80,7 @@ class Optimizer:
|
|||
self.optimize_llm_config = opt_llm_config
|
||||
self.execute_llm_config = exec_llm_config
|
||||
self.optimize_llm = create_llm_instance(self.optimize_llm_config)
|
||||
# TODO 这里出错在哪里?
|
||||
self.dataset = dataset
|
||||
self.graph = None # 初始化为 None,稍后加载
|
||||
self.operators = operators
|
||||
|
|
@ -73,56 +94,27 @@ class Optimizer:
|
|||
self.type = q_type
|
||||
self.round = 1 # 起始轮次
|
||||
|
||||
def _initialize_oprimizer(self):
|
||||
pass
|
||||
|
||||
def _initialize_operator(self):
|
||||
# TODO @issac
|
||||
pass
|
||||
|
||||
def _initialize(self):
|
||||
"""
|
||||
基于数据集、操作符初始化optimize prompt, operator 跟 graph
|
||||
"""
|
||||
self._initialize_optimizer()
|
||||
|
||||
round_1_path = f"{self.root_path}/graphs/round_1"
|
||||
required_files = ["operator.py", "prompt.py"]
|
||||
|
||||
def check_files_exist(basic_path, required_files):
|
||||
missing_files = []
|
||||
|
||||
for file in required_files:
|
||||
if not os.path.exists(os.path.join(basic_path, file)):
|
||||
missing_files.append(file)
|
||||
|
||||
if not missing_files:
|
||||
return True, []
|
||||
else:
|
||||
return False, missing_files
|
||||
|
||||
if check_files_exist(round_1_path, required_files):
|
||||
logger.info(f"{self.dataset} has been initialized")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"{self.dataset} has not been initialized")
|
||||
|
||||
# 迭代优化Operator,Opt可视内容:Prompt,Operator
|
||||
self._initialize_operator()
|
||||
|
||||
# 初始化Graph,直接手动从模版中取出(COT)
|
||||
|
||||
def optimize(self, mode: OptimizerType = "Complete", max_rounds: int = 100):
|
||||
"""
|
||||
Optimize the graph and operator for the dataset.
|
||||
"""
|
||||
if mode == "Complete":
|
||||
self._initialize() # Operator's Optimization
|
||||
# self._initialize() # 构造初始图,从Template中取出模板进行构建 # TODO 这个适合完整了之后再做
|
||||
self._optimize_operator() # 扩展Operator;优化Operator
|
||||
|
||||
if mode == "Operator":
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
score = loop.run_until_complete(self._optimize_operator(1))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
return None
|
||||
|
||||
for opt_round in range(max_rounds):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
score = loop.run_until_complete(self._optimize_graph())
|
||||
finally:
|
||||
|
|
@ -138,43 +130,45 @@ class Optimizer:
|
|||
"""
|
||||
动态加载指定轮次的 Graph 类。
|
||||
"""
|
||||
graphs_path = graphs_path.replace("\\", ".").replace("/", ".")
|
||||
graph_module_name = f"{graphs_path}.round_{round_number}.graph"
|
||||
|
||||
try:
|
||||
graph_module = __import__(graph_module_name, fromlist=[""])
|
||||
# TODO 这里似乎有BUG
|
||||
graph_class = getattr(graph_module, f"{self.dataset}Graph")
|
||||
graph_class = getattr(graph_module, "SolveGraph")
|
||||
self.graph = graph_class
|
||||
except ImportError as e:
|
||||
print(f"Error loading graph for round {round_number}: {e}")
|
||||
raise
|
||||
|
||||
def _read_files(self, round_number, graphs_path):
|
||||
def _read_graph_files(self, round_number, graphs_path):
|
||||
"""
|
||||
动态读取指定轮次的 Prompt和Graph。
|
||||
"""
|
||||
# 构建 prompt.py 文件的相对路径
|
||||
# examples/ags/w_action_node/optimized/gsm8k/graphs/round_1
|
||||
prompt_file_path = os.path.join(graphs_path, "prompt.py")
|
||||
graph_file_path = os.path.join(graphs_path, "graph.py")
|
||||
operator_file_path = os.path.join(graphs_path, "operator.py")
|
||||
# examples/ags/w_action_node/optimized/Gsm8k/graphs/round_1
|
||||
prompt_file_path = os.path.join(graphs_path, f"round_{round_number}", "prompt.py")
|
||||
graph_file_path = os.path.join(graphs_path, f"round_{round_number}", "graph.py")
|
||||
|
||||
try:
|
||||
with open(prompt_file_path, "r", encoding="utf-8") as file:
|
||||
prompt_content = file.read()
|
||||
with open(graph_file_path, "r", encoding="utf-8") as file:
|
||||
graph_content = file.read()
|
||||
with open(operator_file_path, "r", encoding="utf-8") as file:
|
||||
operator_content = file.read()
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: File not found for round {round_number}: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error loading prompt for round {round_number}: {e}")
|
||||
raise
|
||||
return prompt_content, graph_content, operator_content
|
||||
return prompt_content, graph_content
|
||||
|
||||
def _load_scores(self):
|
||||
rounds_dir = os.path.join(self.root_path, "graphs")
|
||||
def _load_scores(self, path=None, mode="Graph"):
|
||||
if mode == "Graph":
|
||||
rounds_dir = f"{self.root_path}/graphs"
|
||||
else:
|
||||
rounds_dir = path
|
||||
self.top_scores = []
|
||||
|
||||
# 遍历所有轮次的文件夹
|
||||
|
|
@ -189,6 +183,7 @@ class Optimizer:
|
|||
|
||||
if filename.endswith(".csv"):
|
||||
# 文件名就是分数
|
||||
# TODO 在这个版本里面,使用csv文件存储分数不太行
|
||||
score = float(filename[:-4]) # 去除.csv
|
||||
|
||||
self.top_scores.append(
|
||||
|
|
@ -234,11 +229,11 @@ class Optimizer:
|
|||
# 返回选定的条目
|
||||
return sorted_items[selected_index]
|
||||
|
||||
def _get_top_rounds(self):
|
||||
def _get_top_rounds(self, path=None, mode="Graph"):
|
||||
"""
|
||||
返回分数最高的 top_x 个轮次,并确保返回的轮次不重复。
|
||||
"""
|
||||
self._load_scores()
|
||||
self._load_scores(path, mode)
|
||||
# 创建一个集合来跟踪已包含的轮次
|
||||
unique_rounds = set()
|
||||
unique_top_scores = []
|
||||
|
|
@ -261,8 +256,11 @@ class Optimizer:
|
|||
|
||||
return unique_top_scores
|
||||
|
||||
def _load_experience(self):
|
||||
rounds_dir = os.path.join(self.root_path, "graphs")
|
||||
def _load_experience(self, path=None, mode: str = "Graph"):
|
||||
if mode == "Graph":
|
||||
rounds_dir = f"{self.root_path}/graphs"
|
||||
else:
|
||||
rounds_dir = path # 这个path对应的是具体的operator的路径
|
||||
experience_data = defaultdict(lambda: {"score": None, "success": [], "failure": []})
|
||||
|
||||
# 遍历所有轮次的文件夹
|
||||
|
|
@ -298,13 +296,42 @@ class Optimizer:
|
|||
experience_data = dict(experience_data)
|
||||
|
||||
# 保存为JSON文件
|
||||
output_path = os.path.join(self.root_path, "graphs", "processed_experience.json")
|
||||
# TODO 这里需要再check一下有没有冲突
|
||||
output_path = os.path.join(rounds_dir, round_dir, "processed_experience.json")
|
||||
print(output_path)
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as outfile: # 指定 UTF-8 编码
|
||||
json.dump(experience_data, outfile, indent=4, ensure_ascii=False) # ensure_ascii=False 以正确保存中文字符
|
||||
|
||||
print(f"Processed experience data saved to {output_path}")
|
||||
return experience_data
|
||||
|
||||
def _load_operator_description(self, id, operator_name, file_path):
|
||||
"""
|
||||
针对初始Operator,我们从最外层中读取
|
||||
对于修改后的Operator,我们从对应的round中读取
|
||||
"""
|
||||
with open(file_path, "r") as f:
|
||||
operator_data = json.load(f)
|
||||
matched_data = operator_data[operator_name]
|
||||
desc = matched_data["description"]
|
||||
interface = matched_data["interface"]
|
||||
operator_description = f"{id}. {operator_name}: {desc}, with interface {interface})."
|
||||
return operator_description
|
||||
|
||||
def _load_operators_description(self, mode: OptimizerType = "Graph", operators=None):
|
||||
if mode == "Graph":
|
||||
path = f"{self.root_path}/graphs/template/operator.json"
|
||||
operators = self.operators
|
||||
else:
|
||||
path = f"{self.root_path}/operators/template/operator.json"
|
||||
operators_description = ""
|
||||
for id, operator in enumerate(operators):
|
||||
operator_description = self._load_operator_description(id + 1, operator, path)
|
||||
operators_description += f"{operator_description}\n"
|
||||
|
||||
return operators_description
|
||||
|
||||
async def _optimize_graph(self):
|
||||
"""
|
||||
Optimize Graph's Structure and Prompt
|
||||
|
|
@ -322,7 +349,7 @@ class Optimizer:
|
|||
|
||||
print(top_rounds)
|
||||
|
||||
prompt, graph_load, operator = self._read_files(sample["round"])
|
||||
prompt, graph_load = self._read_graph_files(sample["round"], graph_path)
|
||||
score = sample["score"]
|
||||
|
||||
# 正则表达式匹配 SolveGraph 开始的内容
|
||||
|
|
@ -348,16 +375,23 @@ class Optimizer:
|
|||
else:
|
||||
experience = f"No experience data found for round {current_round}."
|
||||
|
||||
operator_description = self._load_operators_description("Graph")
|
||||
|
||||
graph_input = GRAPH_INPUT.format(
|
||||
experinece=experience, score=score, graph=graph[0], prompt=prompt, type=self.type
|
||||
experience=experience,
|
||||
score=score,
|
||||
graph=graph[0],
|
||||
prompt=prompt,
|
||||
operator_description=operator_description,
|
||||
type=self.type,
|
||||
)
|
||||
graph_system = GRAPH_OPTIMIZE_PROMPT.format(type=self.type)
|
||||
|
||||
graph_optimize_prompt = graph_system + graph_input # TODO 看一眼谁先谁后这个地方
|
||||
graph_optimize_prompt = graph_system + graph_input
|
||||
|
||||
# TODO 从这里开始,Graph Optimize 可以作为一个Operator放入 Operator.py 之中
|
||||
graph_optimize_node = await ActionNode.from_pydantic(GraphOptimize).fill(
|
||||
context=graph_optimize_prompt, mode="context_fill", llm=self.llm
|
||||
context=graph_optimize_prompt, mode="context_fill", llm=self.optimize_llm
|
||||
)
|
||||
|
||||
max_retries = 5
|
||||
|
|
@ -379,12 +413,10 @@ class Optimizer:
|
|||
time.sleep(5)
|
||||
|
||||
graph_match = response["graph"]
|
||||
prompt_match = response["prompt"]
|
||||
modification_match = response["modification"]
|
||||
prompt = response["prompt"]
|
||||
modification = response["modification"]
|
||||
|
||||
modification = modification_match.group(1)
|
||||
prompt = prompt_match.group(1)
|
||||
graph = GRAPH_TEMPLATE.format(graph=graph_match.group(1), round=self.round + 1)
|
||||
graph = GRAPH_TEMPLATE.format(graph=graph_match, round=self.round + 1)
|
||||
|
||||
# 将 graph.py 文件写入到目录中
|
||||
with open(os.path.join(directory, "graph.py"), "w", encoding="utf-8") as file:
|
||||
|
|
@ -405,127 +437,326 @@ class Optimizer:
|
|||
"after": None,
|
||||
"succeed": None,
|
||||
}
|
||||
|
||||
# TODO 把这个放到最后,这样succeed等参数才能被设置
|
||||
with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
|
||||
json.dump(experience, file, ensure_ascii=False, indent=4)
|
||||
|
||||
score = evaluator.validation_evaluate(
|
||||
self.dataset, self.graph, {"dataset": self.dataset, "llm_config": self.execute_llm_config}
|
||||
self._load_graph(self.round + 1, graph_path)
|
||||
|
||||
evaluator = Evaluator(eval_path=directory)
|
||||
|
||||
score = await evaluator.validation_evaluate(
|
||||
self.dataset, self.graph, {"dataset": self.dataset, "llm_config": self.execute_llm_config}, directory
|
||||
)
|
||||
experience["after"] = score
|
||||
experience["succeed"] = bool(score > experience["before"])
|
||||
return score
|
||||
|
||||
async def _optimize_operator(self):
|
||||
def _read_operator_files(self, operator, round_number, operator_path):
|
||||
def find_operator_prompt(operator, file_path):
|
||||
# 构建变量名
|
||||
target_var = f"{operator}_PROMPT" # -> 大写 Generate_PROMPT ->
|
||||
print(f"Target variable: {target_var}")
|
||||
|
||||
# 打开并读取文件内容
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
|
||||
# 使用正则表达式查找变量定义
|
||||
pattern = rf'{target_var}\s*=\s*"""\s*(.*?)\s*"""'
|
||||
print(f"Regex pattern: {pattern}")
|
||||
match = re.search(pattern, content, re.DOTALL)
|
||||
if match:
|
||||
# 返回变量的值
|
||||
return match.group(1).strip()
|
||||
else:
|
||||
return None
|
||||
|
||||
if round_number == 1:
|
||||
prompt_file_path = os.path.join(operator_path, "template", "op_prompt.py") # template path
|
||||
prompt_content = find_operator_prompt(operator, prompt_file_path)
|
||||
operator_file_path = os.path.join(operator_path, "template", "operator.py")
|
||||
with open(operator_file_path, "r") as file:
|
||||
content = file.read()
|
||||
pattern = rf"class\s+{re.escape(operator)}\(.*?\):\s*.*?(?=\nclass|\Z)"
|
||||
match = re.search(pattern, content, re.DOTALL | re.MULTILINE)
|
||||
operator_content = match.group(0).strip()
|
||||
operator_content = OPERATOR_TEMPLATE.format(
|
||||
operator_name=operator, round_number=round_number, operator=operator_content
|
||||
)
|
||||
graph_file_path = os.path.join(operator_path, "template", "graph.py")
|
||||
with open(graph_file_path, "r", encoding="utf-8") as file:
|
||||
graph_content = file.read()
|
||||
return operator_content, prompt_content, graph_content
|
||||
|
||||
operator_file_path = os.path.join(operator_path, f"{operator}", f"round_{round_number-1}", "operator.py")
|
||||
prompt_file_path = os.path.join(operator_path, f"{operator}", f"round_{round_number-1}", "prompt.py")
|
||||
graph_file_path = os.path.join(operator_path, f"{operator}", f"round_{round_number-1}", "graph.py")
|
||||
|
||||
try:
|
||||
with open(operator_file_path, "r", encoding="utf-8") as file:
|
||||
operator_content = file.read()
|
||||
with open(prompt_file_path, "r", encoding="utf-8") as file:
|
||||
prompt_content = find_operator_prompt(operator, prompt_file_path)
|
||||
|
||||
with open(graph_file_path, "r", encoding="utf-8") as file:
|
||||
graph_content = file.read()
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"Error: File not found for round {round_number}: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error loading prompt for round {round_number}: {e}")
|
||||
raise
|
||||
return operator_content, prompt_content, graph_content
|
||||
|
||||
async def _optimize_operator(self, extend_rounds: int = 5):
|
||||
"""
|
||||
Optimize Graph's Structure and Prompt
|
||||
生成关系
|
||||
1. round_1/graph.py, round_1/prompt.py 是在operator优化完后生成的。从新的Optimizer.Operators 进行类属性的分配;Operator将优化后的Prompt放进prompt.py之中
|
||||
2. template 中 op_prompt, operator_an, 是为了支持operator.py, operator.json 是为了获取新的Operator描述
|
||||
关系应该是Operator优化自己玩自己的,然后取最后的最佳结果连接过去
|
||||
"""
|
||||
# 获取项目的根目录
|
||||
graph_path = f"{self.root_path}/operators"
|
||||
operators_path = f"{self.root_path}/operators"
|
||||
|
||||
# 创建文件夹(如果不存在)
|
||||
directory = os.path.join(graph_path, f"round_{self.round + 1}")
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
# 读取Template文件夹
|
||||
template_path = f"{self.root_path}/operators/template"
|
||||
template_json_path = f"{template_path}/operator.json"
|
||||
template_op_prompt_path = f"{template_path}/op_prompt.py"
|
||||
template_an_path = f"{template_path}/operator_an.py"
|
||||
template_operator_path = f"{template_path}/operator.py"
|
||||
|
||||
top_rounds = self._get_top_rounds()
|
||||
# 读取Templeate信息,进行Operator Extend
|
||||
extend_operators_name = []
|
||||
extend_operators_codes = {} # 保存扩展后的Operator Code
|
||||
extend_operators_prompts = {}
|
||||
|
||||
sample = self._select_round(top_rounds)
|
||||
# 扩展阶段
|
||||
# TODO 现在扩展阶段,出现第二段直接啥也没有的状况
|
||||
for extend_round in range(extend_rounds):
|
||||
current_operators = self.operators + extend_operators_name
|
||||
operators_descriptions = self._load_operators_description("Operator", current_operators)
|
||||
operator_extend_system_prompt = OPERATOR_EXTEND_PROMPT.format(type=self.type)
|
||||
operator_extend_input = OPERATOR_EXTEND_INPUT_PROMPT.format(
|
||||
operators=operators_descriptions, code=OPERATOR_CODE_EXAMPLES
|
||||
)
|
||||
extend_prompt = operator_extend_system_prompt + operator_extend_input
|
||||
operator_extend_node = await ActionNode.from_pydantic(OperatorExtend).fill(
|
||||
context=extend_prompt, mode="context_fill", llm=self.optimize_llm
|
||||
)
|
||||
extend_response = operator_extend_node.instruct_content.model_dump()
|
||||
extend_description = {
|
||||
"description": extend_response["description"],
|
||||
"interface": extend_response["interface"],
|
||||
}
|
||||
# 读取并更新JSON文件
|
||||
if os.path.exists(template_json_path):
|
||||
with open(template_json_path, "r") as json_file:
|
||||
operator_data = json.load(json_file)
|
||||
else:
|
||||
operator_data = []
|
||||
|
||||
print(top_rounds)
|
||||
operator_data[extend_response["name"]] = extend_description
|
||||
|
||||
prompt, graph_load, operator_load = self._read_files(sample["round"])
|
||||
score = sample["score"]
|
||||
with open(template_json_path, "w") as json_file:
|
||||
json.dump(operator_data, json_file, indent=4)
|
||||
|
||||
# 正则表达式匹配 SolveGraph 开始的内容
|
||||
operator_pattern = rf"class {self.op}(Operator):.+"
|
||||
extend_operators_codes[extend_response["name"]] = extend_response["code"]
|
||||
extend_operators_prompts[extend_response["name"]] = {
|
||||
"name": extend_response["prompt_variable_name"],
|
||||
"content": extend_response["prompt"],
|
||||
}
|
||||
extend_operators_name.append(extend_response["name"])
|
||||
|
||||
graph_pattern = r"class SolveGraph:.+"
|
||||
|
||||
# 使用re.findall找到所有匹配项
|
||||
operator = re.findall(operator_pattern, operator_load, re.DOTALL)
|
||||
graph = re.findall(graph_pattern, graph_load, re.DOTALL)
|
||||
|
||||
# 加载处理过的 experience 数据
|
||||
processed_experience = self._load_experience()
|
||||
|
||||
# 获取当前轮次的 experience 数据
|
||||
current_round = int(sample["round"]) # 确保是字符串类型
|
||||
experience_data = processed_experience.get(current_round)
|
||||
|
||||
if experience_data:
|
||||
# 构建 experience 字符串
|
||||
experience = f"Original Score: {experience_data['score']}\n"
|
||||
experience += "Failed modifications:\n"
|
||||
for mod in experience_data["failure"]:
|
||||
experience += f"- {mod['modification']} (Score: {mod['score']})\n"
|
||||
experience += "\n\nNote: Reference failed experiences, avoid trying failed approaches again, attempt to change your thinking, not limited to using more advanced Python syntax like for, if, else, etc., or modifying the Prompt part"
|
||||
else:
|
||||
experience = f"No experience data found for round {current_round}."
|
||||
|
||||
operator_input = OPERATOR_INPUT.format(
|
||||
experinece=experience, score=score, operator=operator[0], prompt=prompt, type=self.type, graph=graph[0]
|
||||
# 筛选阶段
|
||||
operator_select_prompt = OPERATOR_SELECT_PROMPT.format(type=self.type, count=1)
|
||||
operator_select_input_prompt = OPERATOR_SELECT_INPUT_PROMPT.format(
|
||||
fixed_operators=self._load_operators_description("Operator", self.operators),
|
||||
candidate_operators=self._load_operators_description("Operator", extend_operators_name),
|
||||
)
|
||||
operator_system = OPERATOR_OPTIMIZE_PROMPT.format(type=self.type)
|
||||
|
||||
node_prompt = operator_system + operator_input # TODO 看一眼谁先谁后这个地方
|
||||
|
||||
node = await ActionNode.from_pydantic(GraphOptimize).fill(
|
||||
context=node_prompt, mode="context_fill", llm=self.llm
|
||||
select_prompt = operator_select_prompt + operator_select_input_prompt
|
||||
operator_select_node = await ActionNode.from_pydantic(OperatorSelect).fill(
|
||||
context=select_prompt, mode="context_fill", llm=self.optimize_llm
|
||||
)
|
||||
select_response = operator_select_node.instruct_content.model_dump()
|
||||
|
||||
max_retries = 5
|
||||
retries = 0
|
||||
select_operators = ast.literal_eval(select_response["selected_operators"])
|
||||
self.operators = self.operators + select_operators
|
||||
|
||||
while retries < max_retries:
|
||||
try:
|
||||
# TODO 需要和评测的模型分开(传入模型或其它方法),如果能实现Temperature调整更好
|
||||
response = node.instruct_content.model_dump()
|
||||
break
|
||||
# 筛选后修改数据
|
||||
with open(template_json_path, "r") as json_file:
|
||||
operator_data = json.load(json_file)
|
||||
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
print(f"Error generating prediction: {e}. Retrying... ({retries}/{max_retries})")
|
||||
filtered_operator_data = {key: operator_data[key] for key in self.operators if key in operator_data}
|
||||
|
||||
if retries == max_retries:
|
||||
print("Maximum retries reached. Skipping this sample.")
|
||||
break
|
||||
time.sleep(5)
|
||||
with open(template_json_path, "w") as json_file:
|
||||
json.dump(filtered_operator_data, json_file, indent=4)
|
||||
|
||||
# TODO 这里其实可以省去
|
||||
operator_match = response["operator"]
|
||||
prompt_match = response["prompt"]
|
||||
modification_match = response["modification"]
|
||||
for operator_name in select_operators:
|
||||
if operator_name in extend_operators_codes.keys():
|
||||
code = extend_operators_codes[operator_name]
|
||||
|
||||
modification = modification_match.group(1)
|
||||
prompt = prompt_match.group(1)
|
||||
operator = OPERATOR_TEMPLATE.format(operator=operator_match.group(1), round=self.round + 1)
|
||||
# 正则表达式匹配类定义
|
||||
action_node_pattern = r"class\s+\w+\(BaseModel\):[\s\S]*?(?=\nclass|\Z)"
|
||||
operator_pattern = r"class\s+\w+\(Operator\):[\s\S]*?(?=\nclass|\Z)"
|
||||
|
||||
# 将 operator.py 文件写入到目录中
|
||||
with open(os.path.join(directory, "operator.py"), "w", encoding="utf-8") as file:
|
||||
file.write(operator)
|
||||
# 提取类定义
|
||||
action_node_class = re.findall(action_node_pattern, code)
|
||||
operator_class = re.findall(operator_pattern, code)
|
||||
|
||||
# 将 prompt.py 文件写入到目录中
|
||||
with open(os.path.join(directory, "prompt.py"), "w", encoding="utf-8") as file:
|
||||
file.write(prompt)
|
||||
# 追加写入到对应的文件中
|
||||
if action_node_class:
|
||||
with open(template_an_path, "a") as an_file:
|
||||
for class_def in action_node_class:
|
||||
an_file.write(f"\n\n{class_def}\n")
|
||||
|
||||
# 将 prompt.py 文件写入到目录中
|
||||
with open(os.path.join(directory, "__init__.py"), "w", encoding="utf-8") as file:
|
||||
file.write("")
|
||||
if operator_class:
|
||||
with open(template_operator_path, "a") as operator_file:
|
||||
for class_def in operator_class:
|
||||
operator_file.write(f"\n\n{class_def}\n")
|
||||
|
||||
experience = {
|
||||
"father node": sample["round"],
|
||||
"modification": modification,
|
||||
"before": sample["score"],
|
||||
"after": None,
|
||||
"succeed": None,
|
||||
}
|
||||
# 将 prompt 写入到 template_op_prompt_path 文件中
|
||||
with open(template_op_prompt_path, "a") as prompt_file:
|
||||
prompt_name = extend_operators_prompts[operator_name]["name"]
|
||||
prompt = extend_operators_prompts[operator_name]["content"]
|
||||
prompt_file.write(f'\n\n{prompt_name} = """{prompt}"""\n\n')
|
||||
|
||||
with open(os.path.join(directory, "experience.json"), "w", encoding="utf-8") as file:
|
||||
json.dump(experience, file, ensure_ascii=False, indent=4)
|
||||
# 优化阶段
|
||||
for operator in self.operators:
|
||||
# Fixed Prompt
|
||||
if operator == "Format" or operator == "Custom":
|
||||
continue
|
||||
optimize_operator_path = f"{operators_path}/{operator}"
|
||||
cur_operator_score_dict = {}
|
||||
|
||||
score = evaluator.validation_evaluate(self.dataset, self.graph)
|
||||
experience["after"] = score
|
||||
experience["succeed"] = bool(score > experience["before"])
|
||||
# 3轮优化,是与你Graph的优化一致 -> Review Revise 辅助性Operator优化
|
||||
for cur_round in range(1, 4):
|
||||
optimize_directory = os.path.join(optimize_operator_path, f"round_{cur_round}")
|
||||
os.makedirs(optimize_directory, exist_ok=True)
|
||||
if cur_operator_score_dict == {}:
|
||||
sample = {
|
||||
"score": 0.8, # 在这里设定Baseline 的优点不太合适,可能还是要先自己跑一轮
|
||||
}
|
||||
sample_round = 0
|
||||
else:
|
||||
sample_round, sample = max(
|
||||
cur_operator_score_dict.items(), key=lambda item: item[1]["score"], default=None
|
||||
)
|
||||
|
||||
operator_code, prompt, graph_load = self._read_operator_files(
|
||||
operator, cur_round, operators_path
|
||||
) # TODO 需要修改
|
||||
operator_desc = self._load_operator_description(0, operator, template_json_path)
|
||||
score = sample["score"]
|
||||
|
||||
# 使用re.findall找到所有匹配项
|
||||
graph_pattern = r"class SolveGraph:.+"
|
||||
graph = re.findall(graph_pattern, graph_load, re.DOTALL)[0]
|
||||
|
||||
# 加载处理过的 experience 数据
|
||||
processed_experience = self._load_experience(path=optimize_operator_path, mode="Operator") # TODO 需要修改
|
||||
# 获取当前轮次的 experience 数据
|
||||
experience_data = processed_experience.get(cur_round)
|
||||
|
||||
if experience_data:
|
||||
# 构建 experience 字符串
|
||||
experience = f"Original Score: {experience_data['score']}\n"
|
||||
experience += "Failed modifications:\n"
|
||||
for mod in experience_data["failure"]:
|
||||
experience += f"- {mod['modification']} (Score: {mod['score']})\n"
|
||||
experience += "\n\nNote: Reference failed experiences, avoid trying failed approaches again, attempt to change your thinking, not limited to using more advanced Python syntax like for, if, else, etc., or modifying the Prompt part"
|
||||
else:
|
||||
experience = f"No experience data found for round {cur_round}."
|
||||
|
||||
operator_input = OPERATOR_OPTIMIZE_INPUT_PROMPT.format(
|
||||
experience=experience,
|
||||
score=score,
|
||||
solvegraph=graph,
|
||||
operator_description=operator_desc,
|
||||
prompt=prompt,
|
||||
)
|
||||
operator_system = OPERATOR_OPTIMIZE_PROMPT.format(type=self.type) # TODO 需要修改
|
||||
|
||||
operator_node_prompt = operator_system + operator_input
|
||||
|
||||
print("-----------operator_node_prompt-----------")
|
||||
print(operator_node_prompt)
|
||||
|
||||
operator_node = await ActionNode.from_pydantic(OperatorOptimze).fill(
|
||||
context=operator_node_prompt, mode="context_fill", llm=self.optimize_llm
|
||||
)
|
||||
|
||||
max_retries = 5
|
||||
retries = 0
|
||||
|
||||
while retries < max_retries:
|
||||
try:
|
||||
# TODO 需要和评测的模型分开(传入模型或其它方法),如果能实现Temperature调整更好
|
||||
response = operator_node.instruct_content.model_dump()
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
print(f"Error generating prediction: {e}. Retrying... ({retries}/{max_retries})")
|
||||
|
||||
if retries == max_retries:
|
||||
print("Maximum retries reached. Skipping this sample.")
|
||||
break
|
||||
time.sleep(5)
|
||||
|
||||
operator_description = response["operator_description"]
|
||||
prompt = response["prompt"]
|
||||
modification = response["modification"]
|
||||
graph = response["solvegraph"]
|
||||
|
||||
# TODO 估计就是这里有问题了
|
||||
graph = OPERATOR_OPTIMIZE_GRAPH_EXAMPLE.format(graph=graph, round=cur_round, operator_name=operator)
|
||||
|
||||
cur_operator_score_dict[cur_round] = {
|
||||
"score": score,
|
||||
"operator_description": operator_description,
|
||||
"prompt": prompt,
|
||||
}
|
||||
|
||||
# 将 prompt.py 文件写入到目录中
|
||||
with open(os.path.join(optimize_directory, "operator.py"), "w", encoding="utf-8") as file:
|
||||
file.write(operator_code)
|
||||
|
||||
with open(os.path.join(optimize_directory, "prompt.py"), "w", encoding="utf-8") as file:
|
||||
file.write(f'\n\n{operator}_PROMPT = """{prompt}"""\n\n')
|
||||
|
||||
with open(os.path.join(optimize_directory, "graph.py"), "w", encoding="utf-8") as file:
|
||||
file.write(graph)
|
||||
|
||||
with open(os.path.join(optimize_directory, "__init__.py"), "w", encoding="utf-8") as file:
|
||||
file.write("")
|
||||
|
||||
experience = {
|
||||
"father node": sample_round,
|
||||
"modification": modification,
|
||||
"before": sample["score"],
|
||||
"after": None,
|
||||
"succeed": None,
|
||||
}
|
||||
|
||||
self._load_graph(cur_round, optimize_operator_path)
|
||||
print("--------")
|
||||
print(type(self.graph))
|
||||
print("--------")
|
||||
with open(os.path.join(optimize_directory, "experience.json"), "w", encoding="utf-8") as file:
|
||||
json.dump(experience, file, ensure_ascii=False, indent=4)
|
||||
|
||||
evaluator = Evaluator(eval_path=optimize_directory)
|
||||
|
||||
score = await evaluator.validation_evaluate(
|
||||
self.dataset,
|
||||
self.graph,
|
||||
{"dataset": self.dataset, "llm_config": self.execute_llm_config},
|
||||
optimize_directory,
|
||||
) # TODO 这里的Graph需要修改
|
||||
experience["after"] = score
|
||||
experience["succeed"] = bool(score > experience["before"])
|
||||
|
||||
def test(self, graph_path: str):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,122 +1,152 @@
|
|||
INITIALIZE_OPERATOR_PROMPT = """
|
||||
您正在处理一个名为{dataset_name}的数据集。该数据集{dataset_description}。
|
||||
OPERATOR_EXTEND_PROMPT = """
|
||||
You are tasked with developing an additional operator and the corresponding prompts to collaboratively solve {type} problems.
|
||||
|
||||
输入特征包括:
|
||||
{input_features}
|
||||
While we typically address these issues using the provided list of operators, I'd like you to apply critical thinking principles (questioning, validating, and self-inquiry) to generate more operators capable of solving this problem.
|
||||
|
||||
输出特征为:
|
||||
{output_features}
|
||||
|
||||
请根据以上信息,优化用途为{operator_name}的prompt以便更好地处理这个数据集:
|
||||
|
||||
{initial_prompt}
|
||||
|
||||
您的任务是:
|
||||
1. 分析数据集的特点和结构
|
||||
2. 考虑输入和输出特征之间的关系
|
||||
3. 调整initial_prompt以更好地利用数据集信息
|
||||
4. 提供一个经过优化的prompt版本
|
||||
|
||||
请提供您优化后的prompt,并简要解释您所做的更改及其原因。
|
||||
Please present your newly created operator description, prompt, and its associated code within XML tags in your response. These will be utilized as new operator for problem-solving. Ensure that the operator is comprehensive and accurate to avoid potential runtime errors.
|
||||
Keep the prompt variable name consistent with the operator's name, and append _PROMPT to it.
|
||||
"""
|
||||
|
||||
# TODO 这里也需要自适应的完成针对不同数据集的GRAPH OPTIMIZE PROMPT
|
||||
OPERATOR_EXTEND_INPUT_PROMPT = """
|
||||
Below is a list of operators and two examples of operator's code:
|
||||
<sample>
|
||||
<operators>{operators}</operators>
|
||||
<code_examples>{code}</code_examples>
|
||||
</sample>
|
||||
"""
|
||||
|
||||
GRAPH_OPTIMIZE_PROMPT = """You are building a Graph and corresponding Prompt to jointly solve {type} problems.
|
||||
OPERATOR_SELECT_PROMPT = """
|
||||
You are tasked with selecting {count} operators from the list of candidate operators to address {type} problems.
|
||||
|
||||
You will see a list of Fixed Operators that provide guidelines for your selection:
|
||||
|
||||
1. The selected operators should complement the Fixed Operators.
|
||||
2. The selected operators should be able to collaborate with other operators within a graph represented by code.
|
||||
3. The selected operators should be the most effective in solving {type} problems.
|
||||
|
||||
Please provide the names of the operators you have selected in the format of List in python within XML tags in your response. These operators will be used to solve the problem. Ensure that the selected operator names match those in the candidate list.
|
||||
"""
|
||||
|
||||
OPERATOR_SELECT_INPUT_PROMPT = """
|
||||
Below is the list of Fixed Operators and the list of candidate operators awaiting selection:
|
||||
<sample>
|
||||
<fixed_operators>{fixed_operators}</fixed_operators>
|
||||
<candidate_operators>{candidate_operators}</candidate_operators>
|
||||
</sample>
|
||||
"""
|
||||
|
||||
OPERATOR_CODE_EXAMPLES = """
|
||||
class GenerateOp(BaseModel):
|
||||
# The Op restricts the keys of the output dictionary, which should be consistent with the Prompt you provide.
|
||||
solution: str = Field(default="", description="Your solution for this problem")
|
||||
|
||||
class Generate(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Generate"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem):
|
||||
prompt = GENERATE_PROMPT.format(problem_description=problem)
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
class FormatOp(BaseModel):
|
||||
solution: str = Field(default="", description="Your formatted answer for this problem")
|
||||
|
||||
class Format(Operator):
|
||||
def __init__(self, name: str = "Format", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem, solution):
|
||||
prompt = FORMAT_PROMPT.format(problem_description=problem, solution=solution)
|
||||
node = await ActionNode.from_pydantic(FormatOp).fill(context=prompt, llm=self.llm)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
"""
|
||||
|
||||
OPERATOR_OPTIMIZE_PROMPT = """
|
||||
Your task is to optimize an Operator that executes within a SolveGraph to collaboratively address {type} problems.
|
||||
|
||||
When optimizing, you should preserve the original function of the Operator, focusing on enhancement rather than complete reconstruct.
|
||||
|
||||
Given an example SolveGraph or the current SolveGraph, along with the Operator description and its corresponding prompt, please execute the Operator within the SolveGraph, refining the prompt to improve performance. Remember, in the solvengraph you can only use the current operator.
|
||||
|
||||
In your response, make only one modification (e.g., a single sentence) and provide the updated Operator_description, Prompt, and SolveGraph enclosed within XML tags. These will be used as the new Prompt for the Operator in subsequent computations and iterations. Ensure that your modifications are complete and accurate to prevent any potential runtime errors.
|
||||
|
||||
Ensure the SolveGraph output is formatted correctly so that it can be directly used in code execution. Ensure that each prompt 's placeholder is consistent with SolveGraph.
|
||||
"""
|
||||
|
||||
|
||||
# TODO 这里的输入可能还要看一下graph的代码吧,不然不是很好弄;同时对应的Operator的参数也不是很好配置,这些最好都要成为优化的一部分
|
||||
OPERATOR_OPTIMIZE_INPUT_PROMPT = """
|
||||
Below is an operator and its corresponding solevgraph, prompt that demonstrated exceptional performance in a previous iteration (maximum score is 1, ):
|
||||
|
||||
<sample>
|
||||
<experience>{experience}</experience>
|
||||
<score>{score}</score>
|
||||
<solvegraph>{solvegraph}</solvegraph>
|
||||
<operator_description>{operator_description}</operator_description>
|
||||
<prompt>{prompt}</prompt>
|
||||
</sample>
|
||||
"""
|
||||
|
||||
OPERATOR_OPTIMIZE_GRAPH_EXAMPLE = """from typing import Literal
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.operators.{operator_name}.round_{round}.operator import *
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.operators.{operator_name}.round_{round}.prompt import *
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
|
||||
|
||||
{graph}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
GRAPH_OPTIMIZE_PROMPT = """You are building a SolveGraph and corresponding Prompt to jointly solve {type} problems.
|
||||
Referring to the given combination of graph and prompt, which forms a basic example of a {type} solution approach, please reconstruct and optimize the Prompt and Graph. You can add, modify, or delete nodes and parameters in the graph, as well as modify, delete, or add new Prompts.
|
||||
Put your modification (only make one point of change, i.e., one sentence), and the modified Prompt and Graph in XML tags in your reply. They will be used as new Prompt and Graph for calculation and iteration. Please ensure they are complete and correct, otherwise it may lead to runtime failures.
|
||||
Only modify the parts in Prompt and Graph within /async def __call__(self, problem: str):/, otherwise it will cause parsing failure.
|
||||
|
||||
When optimizing, you can refer to critical thinking, and can incorporate methods such as Review, Revise, Ensemble, selfAsk, etc. Don't be limited to the previous format.You can consider Python's built-in loops (like for, while, and list comprehensions) or conditional statements (such as if-elif-else and ternary operators), or even machine learning methods ranging from basic supervised learning techniques (e.g., linear regression, decision trees) to more advanced approaches like neural networks and clustering algorithms. However, you must ensure that each call to the Graph internally involves at most 10 interactions, i.e., the complexity of the graph does not exceed 15."""
|
||||
All prompts can and must contain only the `input` placeholder.
|
||||
When optimizing, you can refer to critical thinking, and can incorporate methods such as Review, Revise, Ensemble, selfAsk, etc. Don't be limited to the previous format.You can consider Python's built-in loops (like for, while, and list comprehensions) or conditional statements (such as if-elif-else and ternary operators), or even machine learning methods ranging from basic supervised learning techniques (e.g., linear regression, decision trees) to more advanced approaches like neural networks and clustering algorithms. the complexity of the graph does not exceed 10."""
|
||||
|
||||
GRAPH_INPUT = """
|
||||
Here is a Graph and corresponding Prompt that performed excellently in a previous iteration (maximum score is 1):\n
|
||||
All prompts can and must contain only the `input` placeholder.
|
||||
<sample>
|
||||
<experience>{experience}</experience>
|
||||
<modification>None</modification>
|
||||
<score>{score}</score>
|
||||
<graph>{graph}</graph>
|
||||
<solvegraph>{graph}</solvegraph>
|
||||
<prompt>{prompt}</prompt>
|
||||
<operator_description>{operator_description}</operator_description>
|
||||
</sample>
|
||||
First provide optimization ideas. Note that ANSWER_FORMAT_PROMPT must exist and cannot be modified. Only add/modify/delete one detail point, extensive modifications are prohibited.\n\n"
|
||||
**"In all cases, the `self.generate` method only accepts `input` as the input information and passes it to the `input` placeholder within the `prompt`; the `prompt` can only contain this single `input` placeholder, and no others are valid."**
|
||||
First provide optimization ideas. Only add/modify/delete one detail point, extensive modifications are prohibited.\n\n"
|
||||
"""
|
||||
|
||||
GRAPH_TEMPLATE = """import os
|
||||
from agentG.llm import LLM
|
||||
import logging
|
||||
from agentG.graphs.gsm8k.round_{round}.prompt import *
|
||||
from logging.handlers import RotatingFileHandler
|
||||
GRAPH_TEMPLATE = """from typing import Literal
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.graphs.template.operator import *
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.graphs.round_{round}.prompt import *
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from metagpt.utils.cost_manager import CostManager
|
||||
|
||||
# 获取项目的根目录
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LOG_FILE = os.path.join(ROOT_DIR, 'app.log')
|
||||
DatasetType = Literal["HumanEval", "MMBP", "Gsm8K", "MATH", "HotpotQa", "MMLU"]
|
||||
|
||||
# 创建一个RotatingFileHandler
|
||||
file_handler = RotatingFileHandler(LOG_FILE, maxBytes=1024*1024, backupCount=5, encoding='utf-8')
|
||||
file_handler.setLevel(logging.INFO)
|
||||
|
||||
# 创建一个格式化器
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# 配置根日志记录器
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[file_handler])
|
||||
|
||||
# 获取当前模块的logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
{graph}
|
||||
{graph}
|
||||
"""
|
||||
|
||||
OPERATOR_OPTIMIZE_PROMPT = """You are building a Operator and corresponding Prompt to jointly solve {type} problems.
|
||||
Referring to the given combination of Operator and prompt, which forms a basic example of a {type} solution approach, please reconstruct and optimize the Prompt and Operator. You can add, modify, or delete nodes and parameters in the Operator, as well as modify, delete, or add new Prompts.
|
||||
Put your modification (only make one point of change, i.e., one sentence), and the modified Prompt and Operator in XML tags in your reply. They will be used as new Prompt and Operator for calculation and iteration. Please ensure they are complete and correct, otherwise it may lead to runtime failures.
|
||||
Only modify the parts in Prompt and Operator.
|
||||
|
||||
Don't be limited to the previous format.You can consider Python's built-in loops (like for, while, and list comprehensions) or conditional statements (such as if-elif-else and ternary operators), or even machine learning methods ranging from basic supervised learning techniques (e.g., linear regression, decision trees) to more advanced approaches like neural networks and clustering algorithms. However, you must ensure that each call to the Operator internally involves at most 10 interactions, i.e., the complexity of the Operator does not exceed 15."""
|
||||
|
||||
|
||||
OPERATOR_INPUT = """
|
||||
Here is a Operator and corresponding Prompt that performed excellently in a previous iteration (maximum score is 1), Graph calls the Operator:\n
|
||||
<sample>
|
||||
<experience>{experience}</experience>
|
||||
<modification>None</modification>
|
||||
<score>{score}</score>
|
||||
<operator>{operator}</operator>
|
||||
<prompt>{prompt}</prompt>
|
||||
<graph>{graph}</graph>
|
||||
</sample>
|
||||
First provide optimization ideas. Note that ANSWER_FORMAT_PROMPT must exist and cannot be modified. Only add/modify/delete one detail point, extensive modifications are prohibited.\n\n"
|
||||
"""
|
||||
|
||||
|
||||
OPERATOR_TEMPLATE = """
|
||||
import ast
|
||||
import random
|
||||
import sys
|
||||
import traceback
|
||||
from collections import Counter
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
OPERATOR_TEMPLATE = """from typing import Literal, List, Dict
|
||||
from pydantic import BaseModel, Field
|
||||
from tenacity import retry, stop_after_attempt
|
||||
from examples.ags.w_action_node.optimized.gsm8k.operators.round_{round}.prompt import *
|
||||
from examples.ags.w_action_node.operator_an import (
|
||||
GenerateOp,
|
||||
)
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.logs import logger
|
||||
from metagpt.provider.llm_provider_registry import create_llm_instance
|
||||
from examples.ags.w_action_node.operator import Operator
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.operators.template.operator_an import *
|
||||
from examples.ags.w_action_node.optimized.Gsm8K.operators.{operator_name}.round_{round_number}.prompt import *
|
||||
|
||||
|
||||
class Operator:
|
||||
def __init__(self, name, llm: LLM):
|
||||
self.name = name
|
||||
self.llm = llm
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
{operator}
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ class ReviseMode(Enum):
|
|||
TAG = "CONTENT"
|
||||
MODE_CODE_FILL = "code_fill"
|
||||
CONTEXT_FILL = "context_fill"
|
||||
SINGLE_FILL = "single_fill"
|
||||
|
||||
LANGUAGE_CONSTRAINT = "Language: Please use the same language as Human INPUT."
|
||||
FORMAT_CONSTRAINT = f"Format: output wrapped inside [{TAG}][/{TAG}] like format example, nothing else."
|
||||
|
|
@ -491,6 +492,8 @@ class ActionNode:
|
|||
return model_class.model_fields.keys()
|
||||
|
||||
def xml_compile(self, context):
|
||||
# TODO 再来一版
|
||||
|
||||
field_names = self.get_field_names()
|
||||
# Construct the example using the field names
|
||||
examples = []
|
||||
|
|
@ -501,7 +504,7 @@ class ActionNode:
|
|||
example_str = "\n".join(examples)
|
||||
# Add the example to the context
|
||||
context += f"""
|
||||
### format example (must be strictly followed) (do not include any other formats except for the given XML format)
|
||||
### response format (must be strictly followed) (do not include any other formats except for the given XML format): \n
|
||||
{example_str}
|
||||
"""
|
||||
return context
|
||||
|
|
@ -517,6 +520,13 @@ class ActionNode:
|
|||
result = {field_name: extracted_code}
|
||||
return result
|
||||
|
||||
async def single_fill(self, context):
|
||||
field_name = self.get_field_name()
|
||||
prompt = context
|
||||
content = await self.llm.aask(prompt)
|
||||
result = {field_name: content}
|
||||
return result
|
||||
|
||||
async def context_fill(self, context):
|
||||
"""
|
||||
Fill Context with XML TAG
|
||||
|
|
@ -525,12 +535,15 @@ class ActionNode:
|
|||
extracted_data = {}
|
||||
content = await self.llm.aask(context)
|
||||
|
||||
# TODO 自动解析类型标注的功能
|
||||
|
||||
for field_name in field_names:
|
||||
# Use regex to find content within XML tags matching the field name
|
||||
pattern = rf"<{field_name}>(.*?)</{field_name}>"
|
||||
match = re.search(pattern, content, re.DOTALL)
|
||||
if match:
|
||||
extracted_data[field_name] = match.group(1).strip()
|
||||
|
||||
return extracted_data
|
||||
|
||||
async def fill(
|
||||
|
|
@ -584,6 +597,11 @@ class ActionNode:
|
|||
self.instruct_content = self.create_class()(**result)
|
||||
return self
|
||||
|
||||
elif mode == SINGLE_FILL:
|
||||
result = await self.single_fill(context)
|
||||
self.instruct_content = self.create_class()(**result)
|
||||
return self
|
||||
|
||||
if strgy == "simple":
|
||||
return await self.simple_fill(schema=schema, mode=mode, images=images, timeout=timeout, exclude=exclude)
|
||||
elif strgy == "complex":
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
# @Author : didi
|
||||
# @Desc : Experiment of graph optimization
|
||||
|
||||
|
||||
from examples.ags.w_action_node.optimizer import Optimizer
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
|
||||
|
|
@ -15,10 +14,11 @@ optimized_path = "examples/ags/w_action_node/optimized" # 优化结果保存路
|
|||
|
||||
# 初始化LLM模型
|
||||
deepseek_llm_config = ModelsConfig.default().get("deepseek-coder")
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3.5-sonnet")
|
||||
|
||||
claude_llm_config = ModelsConfig.default().get("claude-3-5-sonnet-20240620")
|
||||
# claude_llm_config = ModelsConfig.default().get("deepseek-coder")
|
||||
# 初始化操作符列表
|
||||
gsm8k_operators = [
|
||||
"Custom",
|
||||
"Generate",
|
||||
"ContextualGenerate",
|
||||
"Format",
|
||||
|
|
@ -42,4 +42,4 @@ optimizer = Optimizer(
|
|||
)
|
||||
|
||||
# 运行优化器
|
||||
optimizer.optimize("Graph")
|
||||
optimizer.optimize("Operator")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue