mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-05 13:52:38 +02:00
更新了xml-compile方法,更新了剩余Baseline
This commit is contained in:
parent
f691c5f439
commit
b9a2d94da2
7 changed files with 236 additions and 2 deletions
|
|
@ -64,7 +64,7 @@ async def evaluate_problem(input: str, graph: Callable, expected_output: str) ->
|
|||
prompt = input
|
||||
max_retries = 5
|
||||
retries = 0
|
||||
|
||||
|
||||
while retries < max_retries:
|
||||
try:
|
||||
prediction = await graph(prompt)
|
||||
|
|
|
|||
119
examples/ags/experiments/baselines/multi_persona_gsm8k.py
Normal file
119
examples/ags/experiments/baselines/multi_persona_gsm8k.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
from examples.ags.scripts.operator import Operator
|
||||
from examples.ags.scripts.graph import SolveGraph
|
||||
from examples.ags.benchmark.gsm8k import gsm8k_evaluation
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.llm import LLM
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
DEBATE_INITIAL_PROMPT = """
|
||||
{question}
|
||||
Please think step by step and then solve this task.
|
||||
"""
|
||||
|
||||
DEBATE_PROMPT = """
|
||||
{question}
|
||||
Considering the solutions provided by other agents as additional suggestions. Please think carefully and provide an updated answer.
|
||||
"""
|
||||
|
||||
FINAL_DECISION_PROMPT = """
|
||||
{question}
|
||||
Considering all the thinking processes and answers:
|
||||
{all_thinking}
|
||||
{all_answers}
|
||||
Please reason carefully and provide the final answer. To ensure accuracy, only provide the answer in the solution, without any steps.
|
||||
"""
|
||||
|
||||
class DebateOp(BaseModel):
|
||||
thinking: str = Field(default="", description="thinking process")
|
||||
answer: str = Field(default="", description="answer")
|
||||
|
||||
class FinalDecisionOp(BaseModel):
|
||||
thinking: str = Field(default="", description="final thinking process")
|
||||
solution: str = Field(default="", description="final answer")
|
||||
|
||||
class DebateAgent(Operator):
|
||||
def __init__(self, llm: LLM, name: str, role: str):
|
||||
super().__init__(name, llm)
|
||||
self.role = role
|
||||
|
||||
async def __call__(self, problem: str, context: List[str] = None, mode: str = None):
|
||||
role_prompt = f"You are a {self.role}. Based on your professional knowledge and thinking style,"
|
||||
if context is None:
|
||||
prompt = role_prompt + DEBATE_INITIAL_PROMPT.format(question=problem)
|
||||
else:
|
||||
prompt = role_prompt + DEBATE_PROMPT.format(question=problem) + "\n".join(context)
|
||||
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(DebateOp).fill(**fill_kwargs)
|
||||
return node.instruct_content.model_dump()
|
||||
|
||||
class FinalDecisionAgent(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "FinalDecision"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem: str, all_thinking: List[str], all_answers: List[str], mode: str = None):
|
||||
prompt = FINAL_DECISION_PROMPT.format(
|
||||
question=problem,
|
||||
all_thinking="\n".join(all_thinking),
|
||||
all_answers="\n".join(all_answers)
|
||||
)
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(FinalDecisionOp).fill(**fill_kwargs)
|
||||
return node.instruct_content.model_dump()
|
||||
|
||||
class MultiPersonaGraph(SolveGraph):
|
||||
def __init__(self, name: str, llm_config, dataset: str):
|
||||
super().__init__(name, llm_config, dataset)
|
||||
self.debate_agents = [
|
||||
DebateAgent(self.llm, f"Debate Agent {i}", role)
|
||||
for i, role in enumerate([
|
||||
'Math Competition Champion',
|
||||
'Elementary School Math Teacher',
|
||||
'Math Professor',
|
||||
'Computer Scientist'
|
||||
])
|
||||
]
|
||||
self.final_decision_agent = FinalDecisionAgent(self.llm)
|
||||
|
||||
async def __call__(self, problem):
|
||||
max_round = 2
|
||||
all_thinking = [[] for _ in range(max_round)]
|
||||
all_answers = [[] for _ in range(max_round)]
|
||||
|
||||
for r in range(max_round):
|
||||
for i, agent in enumerate(self.debate_agents):
|
||||
if r == 0:
|
||||
result = await agent(problem, mode="context_fill")
|
||||
else:
|
||||
context = [f"{agent.role}'s previous round thinking: {all_thinking[r-1][i]}"] + \
|
||||
[f"{self.debate_agents[j].role}'s thinking: {all_thinking[r-1][j]}" for j in range(len(self.debate_agents)) if j != i]
|
||||
result = await agent(problem, context, mode="context_fill")
|
||||
all_thinking[r].append(result["thinking"])
|
||||
all_answers[r].append(result["answer"])
|
||||
|
||||
final_result = await self.final_decision_agent(
|
||||
problem,
|
||||
[f"{agent.role}'s final thinking: {thinking}" for agent, thinking in zip(self.debate_agents, all_thinking[-1])],
|
||||
[f"{agent.role}'s final answer: {answer}" for agent, answer in zip(self.debate_agents, all_answers[-1])],
|
||||
mode="context_fill"
|
||||
)
|
||||
return final_result, self.llm.cost_manager.total_cost
|
||||
|
||||
if __name__ == "__main__":
|
||||
async def main():
|
||||
llm_config = ModelsConfig.default().get("deepseek-coder")
|
||||
graph = MultiPersonaGraph(name="multi-persona", llm_config=llm_config, dataset="Gsm8K")
|
||||
file_path = "examples/ags/data/gsm8k.jsonl"
|
||||
samples = 1
|
||||
path = "examples/ags/data/baselines/general"
|
||||
score, cost = await gsm8k_evaluation(graph, file_path, samples, path)
|
||||
return score, cost
|
||||
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
115
examples/ags/experiments/baselines/self_refine_gsm8k.py
Normal file
115
examples/ags/experiments/baselines/self_refine_gsm8k.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
from examples.ags.scripts.operator import Operator
|
||||
from examples.ags.scripts.graph import SolveGraph
|
||||
from examples.ags.benchmark.gsm8k import gsm8k_evaluation
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.llm import LLM
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, Any
|
||||
|
||||
GSM8K_PROMPT_GPT = """
|
||||
{question}\nPlease reason step by step, and put your final answer in the end. Wrap content using xml tags.
|
||||
"""
|
||||
|
||||
GSM8K_PROMPT_DS = """
|
||||
{question}\nPlease reason step by step, and put your final answer within \\boxed{{}}.
|
||||
"""
|
||||
|
||||
REVIEW_PROMPT = """
|
||||
For the question described as {question},
|
||||
please review the following solution: {solution}, and criticize on where might be wrong. You should provide a review result in boolean format.
|
||||
If you believe the solution is capable of resolving the issue, return True; otherwise, return False, and include your feedback.
|
||||
"""
|
||||
|
||||
REVISE_PROMPT = """
|
||||
For the question described as {question}, \nand an error solution: {solution}, \nwith the feedback: {feedback},
|
||||
Given the previous solution and feedback, carefully refine the solution to solve the question and ensure it aligns with the original format.
|
||||
"""
|
||||
|
||||
class GenerateOp(BaseModel):
|
||||
solution: str = Field(default="", description="solution for the problem")
|
||||
|
||||
class ReviewOp(BaseModel):
|
||||
review_result: bool = Field(
|
||||
default=False,
|
||||
description="The Review Result (Bool). If you think this solution looks good for you, return 'true'; If not, return 'false'",
|
||||
)
|
||||
feedback: str = Field(
|
||||
default="",
|
||||
description="Your FeedBack for this problem based on the criteria. If the review result is true, you can put it 'nothing here'.",
|
||||
)
|
||||
|
||||
|
||||
class ReviseOp(BaseModel):
|
||||
solution: str = Field(default="", description="Based on the feedback, revised solution for this problem")
|
||||
|
||||
|
||||
class CoTGenerate(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Generate"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem, mode: str = None):
|
||||
prompt = GSM8K_PROMPT_GPT.format(question=problem)
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
class Review(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Review"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem, solution, mode: str = None):
|
||||
prompt = REVIEW_PROMPT.format(question=problem, solution=solution)
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(ReviewOp).fill(**fill_kwargs)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
class Revise(Operator):
|
||||
def __init__(self, name: str = "Revise", llm: LLM = LLM()):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem, solution, feedback, mode: str = None):
|
||||
prompt = REVISE_PROMPT.format(question=problem, solution=solution, feedback=feedback)
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(ReviseOp).fill(**fill_kwargs)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
class SelfRefineGraph(SolveGraph):
|
||||
def __init__(self, name: str, llm_config, dataset: str):
|
||||
super().__init__(name, llm_config, dataset)
|
||||
self.cot_generate = CoTGenerate(self.llm)
|
||||
self.review = Review(self.llm)
|
||||
self.revise = Revise(self.llm)
|
||||
|
||||
async def __call__(self, problem):
|
||||
solution = await self.cot_generate(problem, mode="context_fill")
|
||||
for i in range(5):
|
||||
review = await self.review(problem, solution)
|
||||
if review["review_result"]:
|
||||
break
|
||||
solution = await self.revise(problem, solution, review["feedback"])
|
||||
return solution, self.llm.cost_manager.total_cost
|
||||
|
||||
if __name__ == "__main__":
|
||||
async def main():
|
||||
llm_config = ModelsConfig.default().get("deepseek-coder")
|
||||
# llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
# llm_config = ModelsConfig.default().get("gpt-35-turbo-1106")
|
||||
graph = SelfRefineGraph(name="self-refine", llm_config=llm_config, dataset="Gsm8K")
|
||||
file_path = "examples/ags/data/gsm8k.jsonl"
|
||||
samples = 10
|
||||
path = "examples/ags/data/baselines/general"
|
||||
score, cost = await gsm8k_evaluation(graph, file_path, samples, path)
|
||||
return score, cost
|
||||
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
|
|
@ -511,7 +511,7 @@ class ActionNode:
|
|||
example_str = "\n".join(examples)
|
||||
# Add the example to the context
|
||||
context += f"""
|
||||
### response format (must be strictly followed) (do not include any other formats except for the given XML format): \n
|
||||
### Response format (must be strictly followed): All content must be enclosed in the given XML tags, ensuring each opening <tag> has a corresponding closing </tag>, with no incomplete or self-closing tags allowed.\n
|
||||
{example_str}
|
||||
"""
|
||||
return context
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue