更新了xml-compile方法,更新了剩余Baseline

This commit is contained in:
didi 2024-09-11 15:21:33 +08:00
parent f691c5f439
commit b9a2d94da2
7 changed files with 236 additions and 2 deletions

View file

@ -64,7 +64,7 @@ async def evaluate_problem(input: str, graph: Callable, expected_output: str) ->
prompt = input
max_retries = 5
retries = 0
while retries < max_retries:
try:
prediction = await graph(prompt)

View file

@ -0,0 +1,119 @@
from examples.ags.scripts.operator import Operator
from examples.ags.scripts.graph import SolveGraph
from examples.ags.benchmark.gsm8k import gsm8k_evaluation
from metagpt.actions.action_node import ActionNode
from metagpt.configs.models_config import ModelsConfig
from metagpt.llm import LLM
from pydantic import BaseModel, Field
from typing import List
DEBATE_INITIAL_PROMPT = """
{question}
Please think step by step and then solve this task.
"""
DEBATE_PROMPT = """
{question}
Considering the solutions provided by other agents as additional suggestions. Please think carefully and provide an updated answer.
"""
FINAL_DECISION_PROMPT = """
{question}
Considering all the thinking processes and answers:
{all_thinking}
{all_answers}
Please reason carefully and provide the final answer. To ensure accuracy, only provide the answer in the solution, without any steps.
"""
class DebateOp(BaseModel):
thinking: str = Field(default="", description="thinking process")
answer: str = Field(default="", description="answer")
class FinalDecisionOp(BaseModel):
thinking: str = Field(default="", description="final thinking process")
solution: str = Field(default="", description="final answer")
class DebateAgent(Operator):
def __init__(self, llm: LLM, name: str, role: str):
super().__init__(name, llm)
self.role = role
async def __call__(self, problem: str, context: List[str] = None, mode: str = None):
role_prompt = f"You are a {self.role}. Based on your professional knowledge and thinking style,"
if context is None:
prompt = role_prompt + DEBATE_INITIAL_PROMPT.format(question=problem)
else:
prompt = role_prompt + DEBATE_PROMPT.format(question=problem) + "\n".join(context)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(DebateOp).fill(**fill_kwargs)
return node.instruct_content.model_dump()
class FinalDecisionAgent(Operator):
def __init__(self, llm: LLM, name: str = "FinalDecision"):
super().__init__(name, llm)
async def __call__(self, problem: str, all_thinking: List[str], all_answers: List[str], mode: str = None):
prompt = FINAL_DECISION_PROMPT.format(
question=problem,
all_thinking="\n".join(all_thinking),
all_answers="\n".join(all_answers)
)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(FinalDecisionOp).fill(**fill_kwargs)
return node.instruct_content.model_dump()
class MultiPersonaGraph(SolveGraph):
def __init__(self, name: str, llm_config, dataset: str):
super().__init__(name, llm_config, dataset)
self.debate_agents = [
DebateAgent(self.llm, f"Debate Agent {i}", role)
for i, role in enumerate([
'Math Competition Champion',
'Elementary School Math Teacher',
'Math Professor',
'Computer Scientist'
])
]
self.final_decision_agent = FinalDecisionAgent(self.llm)
async def __call__(self, problem):
max_round = 2
all_thinking = [[] for _ in range(max_round)]
all_answers = [[] for _ in range(max_round)]
for r in range(max_round):
for i, agent in enumerate(self.debate_agents):
if r == 0:
result = await agent(problem, mode="context_fill")
else:
context = [f"{agent.role}'s previous round thinking: {all_thinking[r-1][i]}"] + \
[f"{self.debate_agents[j].role}'s thinking: {all_thinking[r-1][j]}" for j in range(len(self.debate_agents)) if j != i]
result = await agent(problem, context, mode="context_fill")
all_thinking[r].append(result["thinking"])
all_answers[r].append(result["answer"])
final_result = await self.final_decision_agent(
problem,
[f"{agent.role}'s final thinking: {thinking}" for agent, thinking in zip(self.debate_agents, all_thinking[-1])],
[f"{agent.role}'s final answer: {answer}" for agent, answer in zip(self.debate_agents, all_answers[-1])],
mode="context_fill"
)
return final_result, self.llm.cost_manager.total_cost
if __name__ == "__main__":
async def main():
llm_config = ModelsConfig.default().get("deepseek-coder")
graph = MultiPersonaGraph(name="multi-persona", llm_config=llm_config, dataset="Gsm8K")
file_path = "examples/ags/data/gsm8k.jsonl"
samples = 1
path = "examples/ags/data/baselines/general"
score, cost = await gsm8k_evaluation(graph, file_path, samples, path)
return score, cost
import asyncio
asyncio.run(main())

View file

@ -0,0 +1,115 @@
from examples.ags.scripts.operator import Operator
from examples.ags.scripts.graph import SolveGraph
from examples.ags.benchmark.gsm8k import gsm8k_evaluation
from metagpt.actions.action_node import ActionNode
from metagpt.configs.models_config import ModelsConfig
from metagpt.llm import LLM
from pydantic import BaseModel, Field
from typing import Dict, Any
GSM8K_PROMPT_GPT = """
{question}\nPlease reason step by step, and put your final answer in the end. Wrap content using xml tags.
"""
GSM8K_PROMPT_DS = """
{question}\nPlease reason step by step, and put your final answer within \\boxed{{}}.
"""
REVIEW_PROMPT = """
For the question described as {question},
please review the following solution: {solution}, and criticize on where might be wrong. You should provide a review result in boolean format.
If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your feedback.
"""
REVISE_PROMPT = """
For the question described as {question}, \nand an error solution: {solution}, \nwith the feedback: {feedback},
Given the previous solution and feedback, carefully refine the solution to solve the question and ensure it aligns with the original format.
"""
class GenerateOp(BaseModel):
solution: str = Field(default="", description="solution for the problem")
class ReviewOp(BaseModel):
review_result: bool = Field(
default=False,
description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
)
feedback: str = Field(
default="",
description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
)
class ReviseOp(BaseModel):
solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
class CoTGenerate(Operator):
def __init__(self, llm: LLM, name: str = "Generate"):
super().__init__(name, llm)
async def __call__(self, problem, mode: str = None):
prompt = GSM8K_PROMPT_GPT.format(question=problem)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class Review(Operator):
def __init__(self, llm: LLM, name: str = "Review"):
super().__init__(name, llm)
async def __call__(self, problem, solution, mode: str = None):
prompt = REVIEW_PROMPT.format(question=problem, solution=solution)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(ReviewOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class Revise(Operator):
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
super().__init__(name, llm)
async def __call__(self, problem, solution, feedback, mode: str = None):
prompt = REVISE_PROMPT.format(question=problem, solution=solution, feedback=feedback)
fill_kwargs = {"context": prompt, "llm": self.llm}
if mode:
fill_kwargs["mode"] = mode
node = await ActionNode.from_pydantic(ReviseOp).fill(**fill_kwargs)
response = node.instruct_content.model_dump()
return response
class SelfRefineGraph(SolveGraph):
def __init__(self, name: str, llm_config, dataset: str):
super().__init__(name, llm_config, dataset)
self.cot_generate = CoTGenerate(self.llm)
self.review = Review(self.llm)
self.revise = Revise(self.llm)
async def __call__(self, problem):
solution = await self.cot_generate(problem, mode="context_fill")
for i in range(5):
review = await self.review(problem, solution)
if review["review_result"]:
break
solution = await self.revise(problem, solution, review["feedback"])
return solution, self.llm.cost_manager.total_cost
if __name__ == "__main__":
async def main():
llm_config = ModelsConfig.default().get("deepseek-coder")
# llm_config = ModelsConfig.default().get("gpt-4o-mini")
# llm_config = ModelsConfig.default().get("gpt-35-turbo-1106")
graph = SelfRefineGraph(name="self-refine", llm_config=llm_config, dataset="Gsm8K")
file_path = "examples/ags/data/gsm8k.jsonl"
samples = 10
path = "examples/ags/data/baselines/general"
score, cost = await gsm8k_evaluation(graph, file_path, samples, path)
return score, cost
import asyncio
asyncio.run(main())

View file

@ -511,7 +511,7 @@ class ActionNode:
example_str = "\n".join(examples)
# Add the example to the context
context += f"""
### response format (must be strictly followed) (do not include any other formats except for the given XML format): \n
### Response format (must be strictly followed): All content must be enclosed in the given XML tags, ensuring each opening <tag> has a corresponding closing </tag>, with no incomplete or self-closing tags allowed.\n
{example_str}
"""
return context