diff --git a/metagpt/strategy/__init__.py b/metagpt/strategy/__init__.py new file mode 100644 index 000000000..fdda6682f --- /dev/null +++ b/metagpt/strategy/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# @Date : 12/23/2023 4:51 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : \ No newline at end of file diff --git a/metagpt/strategy/base.py b/metagpt/strategy/base.py new file mode 100644 index 000000000..fb2adc8f2 --- /dev/null +++ b/metagpt/strategy/base.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# @Date : 12/25/2023 9:16 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +from typing import List + +from pydantic import BaseModel +from anytree import Node, RenderTree + + + +class BaseParser(BaseModel): + def __call__(self, *args, **kwargs): + raise NotImplementedError + + def propose(self, current_state: str, **kwargs) -> str: + raise NotImplementedError + + def sample(self, current_state: str, **kwargs) -> str: + raise NotImplementedError + + def value(self, input: str, **kwargs) -> str: + raise NotImplementedError + + +class BaseEvaluator(BaseModel): + def __call__(self, *args, **kwargs): + raise NotImplementedError + + def status_verify(self, *args, **kwargs): + raise NotImplementedError + +class ThoughtNode(Node): + """A node representing a thought in the thought tree.""" + + name: str = "" + value: int = 0 + id: int = 0 + valid_status: bool = True + + def update_value(self, value) -> None: + """Update the value of the thought node.""" + self.value = value + + def update_valid_status(self, status) -> None: + """Update the validity status of the thought node.""" + self.valid_status = status + + +class ThoughtTree(RenderTree): + """A tree structure to represent thoughts.""" + + @property + def all_nodes(self) -> List[ThoughtNode]: + """Get a list of all nodes in the thought tree.""" + all_nodes = [node for _, _, node in self] + return all_nodes + + def update_node(self, thought: List[dict] = [], current_node: ThoughtNode = None) -> List[ThoughtNode]: + """Update the tree with new thoughts.""" + nodes = [] + for node_info in thought: + node = ThoughtNode(name=node_info["node_state_instruction"], parent=current_node, + id=int(node_info["node_id"])) + nodes.append(node) + return nodes + + def parse_node_path(self, node) -> List[str]: + """Parse the path of the given thought node.""" + full_node_path = [] + while node is not None: + full_node_path.append(node.name) + node = node.parent + full_node_path.reverse() + return full_node_path + + def show(self) -> None: + """Print the updated tree.""" + print("\nUpdated Tree:") + for pre, _, node in self: + print(f"{pre}{node.name}, value: {node.value}, valid_status: {node.valid_status}") \ No newline at end of file diff --git a/metagpt/strategy/examples/__init__.py b/metagpt/strategy/examples/__init__.py new file mode 100644 index 000000000..fb618fbcf --- /dev/null +++ b/metagpt/strategy/examples/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# @Date : 12/26/2023 3:32 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : diff --git a/metagpt/strategy/examples/creative_writing.py b/metagpt/strategy/examples/creative_writing.py new file mode 100644 index 000000000..94c6a26b0 --- /dev/null +++ b/metagpt/strategy/examples/creative_writing.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# @Date : 12/25/2023 1:06 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import re + +from metagpt.strategy.tot_schema import BaseParser, BaseEvaluator, Strategy, ThoughtSolverConfig +from metagpt.strategy.tot import TreeofThought +from metagpt.strategy.prompt_templates.creative_writing import cot_prompt, vote_prompt + + +class TextGenParser(BaseParser): + propose_prompt: str = cot_prompt + value_prompt: str = vote_prompt + + def __call__(self, input_text: str) -> str: + return input_text + + def propose(self, current_state: str, **kwargs) -> str: + return self.propose_prompt.format(input=current_state, **kwargs) + + def value(self, input: str = "", **kwargs) -> str: + # node_result = self(input) + id = kwargs.get("node_id", "0") + return self.value_prompt + f'Choice {id}:\n{input}\n' + + +class TextGenEvaluator(BaseEvaluator): + value_map = {'impossible': 0.001, 'likely': 1, 'sure': 20} # TODO: ad hoc + status_map = {val: key for key, val in value_map.items()} + + def __call__(self, evaluation: str, **kwargs) -> float: + try: + value = 0 + node_id = kwargs.get("node_id", "0") + pattern = r".*best choice is .*(\d+).*" + match = re.match(pattern, evaluation, re.DOTALL) + + if match: + vote = int(match.groups()[0]) + print(vote) + if vote == int(node_id): + value = 1 + except: + value = 0 + return value + + def status_verify(self, value): + status = False + if value in self.status_map: + status_value = self.status_map[value] + if status_value != "impossible": + status = True + return status + + +if __name__ == "__main__": + import asyncio + + initial_prompt = """It isn't difficult to do a handstand if you just stand on your hands. It caught him off guard that space smelled of seared steak. When she didn’t like a guy who was trying to pick her up, she started using sign language. Each person who knows you has a different perception of who you are.""" + + + parser = TextGenParser() + evaluator = TextGenEvaluator() + + config = ThoughtSolverConfig(n_generate_sample=3, + parser=parser, + evaluator=evaluator) + + + tot_base = TreeofThought(strategy=Strategy.BFS, config=config) + asyncio.run(tot_base.solve(init_prompt=initial_prompt)) \ No newline at end of file diff --git a/metagpt/strategy/examples/game24.py b/metagpt/strategy/examples/game24.py new file mode 100644 index 000000000..234484cc4 --- /dev/null +++ b/metagpt/strategy/examples/game24.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# @Date : 12/25/2023 1:36 AM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import re + +from metagpt.strategy.tot_schema import BaseParser, BaseEvaluator, Strategy, ThoughtSolverConfig +from metagpt.strategy.tot import TreeofThought +from metagpt.strategy.prompt_templates.game24 import propose_prompt, value_prompt + + +class Game24Parser(BaseParser): + propose_prompt: str = propose_prompt + value_prompt: str = value_prompt + + def __call__(self, input_text: str) -> str: + last_line = input_text.strip().split('\n')[-1] + return last_line.split('left: ')[-1].split(')')[0] + + def propose(self, current_state: str, **kwargs) -> str: + return self.propose_prompt.format(input=current_state, **kwargs) + + def value(self, input: str = "", **kwargs) -> str: + node_result = self(input) + return self.value_prompt.format(input=node_result) + + +class Game24Evaluator(BaseEvaluator): + value_map = {'impossible': 0.001, 'likely': 1, 'sure': 20} # TODO: ad hoc + status_map = {val: key for key, val in value_map.items()} + + def __call__(self, evaluation: str, **kwargs) -> float: + try: + matches = re.findall(r'\b(impossible|sure|likely)\b', evaluation) + value = self.value_map[matches[0]] + except: + value = 0.001 + return value + + def status_verify(self, value): + status = False + if value in self.status_map: + status_value = self.status_map[value] + if status_value != "impossible": + status = True + return status + +if __name__ == "__main__": + import asyncio + + initial_prompt = """4 5 6 10""" + parser = Game24Parser() + evaluator = Game24Evaluator() + + config = ThoughtSolverConfig(n_generate_sample=5, + parser=parser, + evaluator=evaluator) + + tot = TreeofThought(strategy=Strategy.BFS, config=config) + asyncio.run(tot.solve(init_prompt=initial_prompt)) diff --git a/metagpt/strategy/prompt_templates/__init__.py b/metagpt/strategy/prompt_templates/__init__.py new file mode 100644 index 000000000..ff6384b37 --- /dev/null +++ b/metagpt/strategy/prompt_templates/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# @Date : 12/23/2023 5:21 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : diff --git a/metagpt/strategy/prompt_templates/creative_writing.py b/metagpt/strategy/prompt_templates/creative_writing.py new file mode 100644 index 000000000..a718d5d18 --- /dev/null +++ b/metagpt/strategy/prompt_templates/creative_writing.py @@ -0,0 +1,25 @@ +standard_prompt = ''' +Write a coherent passage of 4 short paragraphs. The end sentence of each paragraph must be: {input} +''' + +cot_prompt = ''' +Write a coherent passage of 4 short paragraphs. The end sentence of each paragraph must be: {input} + +Make a plan then write. Your output should be of the following format: + +Plan: +Your plan here. + +Passage: +Your passage here. +''' + + +vote_prompt = '''Given an instruction and several choices, decide which choice is most promising. Analyze each choice in detail, then conclude in the last line "The best choice is {s}", where s the integer id of the choice. +''' + +compare_prompt = '''Briefly analyze the coherency of the following two passages. Conclude in the last line "The more coherent passage is 1", "The more coherent passage is 2", or "The two passages are similarly coherent". +''' + +score_prompt = '''Analyze the following passage, then at the last line conclude "Thus the coherency score is {s}", where s is an integer from 1 to 10. +''' \ No newline at end of file diff --git a/metagpt/strategy/prompt_templates/game24.py b/metagpt/strategy/prompt_templates/game24.py new file mode 100644 index 000000000..20b00fed0 --- /dev/null +++ b/metagpt/strategy/prompt_templates/game24.py @@ -0,0 +1,139 @@ +# 5-shot +standard_prompt = '''Use numbers and basic arithmetic operations (+ - * /) to obtain 24. +Input: 4 4 6 8 +Answer: (4 + 8) * (6 - 4) = 24 +Input: 2 9 10 12 +Answer: 2 * 12 * (10 - 9) = 24 +Input: 4 9 10 13 +Answer: (13 - 9) * (10 - 4) = 24 +Input: 1 4 8 8 +Answer: (8 / 4 + 1) * 8 = 24 +Input: 5 5 5 9 +Answer: 5 + 5 + 5 + 9 = 24 +Input: {input} +''' + +# 5-shot +cot_prompt = '''Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number. +Input: 4 4 6 8 +Steps: +4 + 8 = 12 (left: 4 6 12) +6 - 4 = 2 (left: 2 12) +2 * 12 = 24 (left: 24) +Answer: (6 - 4) * (4 + 8) = 24 +Input: 2 9 10 12 +Steps: +12 * 2 = 24 (left: 9 10 24) +10 - 9 = 1 (left: 1 24) +24 * 1 = 24 (left: 24) +Answer: (12 * 2) * (10 - 9) = 24 +Input: 4 9 10 13 +Steps: +13 - 10 = 3 (left: 3 4 9) +9 - 3 = 6 (left: 4 6) +4 * 6 = 24 (left: 24) +Answer: 4 * (9 - (13 - 10)) = 24 +Input: 1 4 8 8 +Steps: +8 / 4 = 2 (left: 1 2 8) +1 + 2 = 3 (left: 3 8) +3 * 8 = 24 (left: 24) +Answer: (1 + 8 / 4) * 8 = 24 +Input: 5 5 5 9 +Steps: +5 + 5 = 10 (left: 5 9 10) +10 + 5 = 15 (left: 9 15) +15 + 9 = 24 (left: 24) +Answer: ((5 + 5) + 5) + 9 = 24 +Input: {input} +''' + +# 1-shot +propose_prompt = '''Here is an Example for 1 input and 8 possible thoughts: +Input: 2 8 8 14 +Possible next steps: +2 + 8 = 10 (left: 8 10 14) +8 / 2 = 4 (left: 4 8 14) +14 + 2 = 16 (left: 8 8 16) +2 * 8 = 16 (left: 8 14 16) +8 - 2 = 6 (left: 6 8 14) +14 - 8 = 6 (left: 2 6 8) +14 / 2 = 7 (left: 7 8 8) +14 - 2 = 12 (left: 8 8 12) + +Here is my task for 1 input and {n_generate_sample} possible thoughts: +Input: {input} +Possible next steps: + + +''' + +value_prompt = '''Evaluate if given numbers can reach 24 (sure/likely/impossible) +10 14 +10 + 14 = 24 +sure +11 12 +11 + 12 = 23 +12 - 11 = 1 +11 * 12 = 132 +11 / 12 = 0.91 +impossible +4 4 10 +4 + 4 + 10 = 8 + 10 = 18 +4 * 10 - 4 = 40 - 4 = 36 +(10 - 4) * 4 = 6 * 4 = 24 +sure +4 9 11 +9 + 11 + 4 = 20 + 4 = 24 +sure +5 7 8 +5 + 7 + 8 = 12 + 8 = 20 +(8 - 5) * 7 = 3 * 7 = 21 +I cannot obtain 24 now, but numbers are within a reasonable range +likely +5 6 6 +5 + 6 + 6 = 17 +(6 - 5) * 6 = 1 * 6 = 6 +I cannot obtain 24 now, but numbers are within a reasonable range +likely +10 10 11 +10 + 10 + 11 = 31 +(11 - 10) * 10 = 10 +10 10 10 are all too big +impossible +1 3 3 +1 * 3 * 3 = 9 +(1 + 3) * 3 = 12 +1 3 3 are all too small +impossible +{input} +''' + +value_last_step_prompt = '''Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Given an input and an answer, give a judgement (sure/impossible) if the answer is correct, i.e. it uses each input exactly once and no other numbers, and reach 24. +Input: 4 4 6 8 +Answer: (4 + 8) * (6 - 4) = 24 +Judge: +sure +Input: 2 9 10 12 +Answer: 2 * 12 * (10 - 9) = 24 +Judge: +sure +Input: 4 9 10 13 +Answer: (13 - 9) * (10 - 4) = 24 +Judge: +sure +Input: 4 4 6 8 +Answer: (4 + 8) * (6 - 4) + 1 = 25 +Judge: +impossible +Input: 2 9 10 12 +Answer: 2 * (12 - 10) = 24 +Judge: +impossible +Input: 4 9 10 13 +Answer: (13 - 4) * (10 - 9) = 24 +Judge: +impossible +Input: {input} +Answer: {answer} +Judge:''' \ No newline at end of file diff --git a/metagpt/strategy/tot.py b/metagpt/strategy/tot.py new file mode 100644 index 000000000..8f4d129d8 --- /dev/null +++ b/metagpt/strategy/tot.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +# @Date : 12/23/2023 4:51 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import asyncio +import json +from typing import Any, List +from functools import wraps + +from pydantic import BaseModel, Field + +from metagpt.llm import LLM +from metagpt.provider.base_gpt_api import BaseGPTAPI +from metagpt.logs import logger +from metagpt.utils.common import CodeParser +from metagpt.strategy.tot_schema import ThoughtSolverConfig, Strategy, MethodSelect +from metagpt.strategy.base import ThoughtNode, ThoughtTree, BaseParser, BaseEvaluator + +OUTPUT_FORMAT = """ +Output a list of jsons following the format: +```json + [ + { + "node_id": str = "unique identifier for a solution, can be an ordinal", + "node_state_instruction": "specified sample of solution", + }, + ... + ] +``` +""" + + +class ThoughtSolverBase(BaseModel): + thought_tree: str = "" + llm: BaseGPTAPI = Field(default_factory=LLM, exclude=True) + config: ThoughtSolverConfig = Field(default_factory=ThoughtSolverConfig) + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self.llm.use_system_prompt = False + + async def solve(self, init_prompt): + """ + Solve method for subclasses to implement. + """ + raise NotImplementedError("Subclasses must implement the solve method") + + async def generate_thoughts(self, current_state="", current_node=None) -> List[ThoughtNode]: + """ + Generate children thoughts based on the current state. + + Args: + current_state (str): The current state for which thoughts are generated. + current_node (ThoughtNode): The current node in the thought tree. + + Returns: + List[ThoughtNode]: List of nodes representing the generated thoughts. + """ + state_prompt = self.config.parser.propose(current_state=current_state, + **{"n_generate_sample": self.config.n_generate_sample}) + rsp = await self.llm.aask(msg=state_prompt + "\n" + OUTPUT_FORMAT) + thoughts = CodeParser.parse_code(block=None, text=rsp) + thoughts = eval(thoughts) + # fixme 避免不跟随,生成过多nodes + # valid_thoughts = [_node for idx, _node in enumerate(thoughts) if idx < self.n_generate_sample] + return self.thought_tree.update_node(thoughts, current_node=current_node) + + async def evaluate_node(self, node, parent_value) -> None: + """ + Evaluate a node and update its status and value. + + Args: + node (ThoughtNode): The node to be evaluated. + parent_value (float): The parent node's value. + + Returns: + None + """ + eval_prompt = self.config.parser.value(input=node.name, **{"node_id": node.id}) + evaluation = await self.llm.aask(msg=eval_prompt) + + value = self.config.evaluator(evaluation, **{"node_id": node.id}) + status = self.config.evaluator.status_verify(value) + + node.update_valid_status(status=status) + # 累计分数 + node.update_value(parent_value + value) + + def select_nodes(self, thought_nodes: List[ThoughtNode]) -> List[ThoughtNode]: + """ + Select nodes based on the configured selection method. + + Args: + thought_nodes (List[ThoughtNode]): List of nodes to be selected. + + Returns: + List[ThoughtNode]: List of selected nodes. + """ + # selection + if self.config.method_select == MethodSelect.SAMPLE: + raise NotImplementedError + elif self.config.method_select == MethodSelect.GREEDY: + select_nodes = sorted(thought_nodes, key=lambda x: x.value, reverse=True)[:self.config.n_select_sample] + for node in thought_nodes: + if node not in select_nodes: + node.parent = None # 从树中删除节点 + return select_nodes + + def update_solution(self): + """ + Select the result with the highest score. + + Returns: + - List[ThoughtNode]: List of nodes representing the best solution. + - List[str]: List of node names forming the best solution path. + """ + best_node = max(self.thought_tree.all_nodes, key=lambda x: x.value, default=None) + best_solution_path = self.thought_tree.parse_node_path(best_node) + return [best_node], best_solution_path + + +class BFSSolver(ThoughtSolverBase): + async def solve(self, init_prompt=""): + """ + Solve the problem using Breadth-First Search (BFS) strategy. + + Args: + init_prompt (str): The initial prompt for the solver. + + Returns: + List[str]: The best solution path obtained through BFS. + """ + root = ThoughtNode(init_prompt) + self.thought_tree = ThoughtTree(root) + current_nodes = [root] + for step in range(self.config.max_steps): + solutions = await self._bfs_build(current_nodes) + + selected_nodes = self.select_nodes(solutions) + current_nodes = selected_nodes + + self.thought_tree.show() + + best_solution, best_solution_path = self.update_solution() + logger.info(f"best solution is: {best_solution_path}") + return best_solution_path + + async def _bfs_build(self, current_nodes): + """ + Build the thought tree using Breadth-First Search (BFS) strategy. + + Args: + current_nodes (List[ThoughtNode]): Current nodes to expand. + + Returns: + List[ThoughtNode]: The solutions obtained after expanding the current nodes. + """ + tasks = [] + for node in current_nodes: + current_state = self.config.parser(node.name) + current_value = node.value + tasks.append(self.generate_and_evaluate_nodes(current_state, current_value, node)) + + thought_nodes_list = await asyncio.gather(*tasks) + solutions = [child_node for thought_nodes in thought_nodes_list for child_node in thought_nodes] + return solutions + + async def generate_and_evaluate_nodes(self, current_state, current_value, node): + thought_nodes = await self.generate_thoughts(current_state, current_node=node) + await asyncio.gather( + *(self.evaluate_node(child_node, parent_value=current_value) for child_node in thought_nodes)) + return thought_nodes + + +class DFSSolver(ThoughtSolverBase): + async def _dfs(self, root_node): + """ + Perform Depth-First Search (DFS) on the thought tree. + + Args: + root_node (ThoughtNode): The root node of the thought tree. + + Returns: + List[str]: The solution path obtained through DFS. + """ + impossible_state_cnt = 0 + node = root_node + for step in range(self.max_steps): + + current_state = self.config.parser(node.name) + current_value = node.value + thought_nodes = await self.generate_thoughts(current_state, current_node=node) + await self.evaluate_node(thought_nodes[0], parent_value=current_value) + if thought_nodes[0].valid_status is False: + impossible_state_cnt += 1 + if impossible_state_cnt >= 2: + logger.info("impossible state reached, break") + break + node = thought_nodes[0] + _solution_path = self.thought_tree.parse_node_path(node) + self.thought_tree.show() + + return _solution_path + + async def solve(self, init_prompt="", root=ThoughtNode("")): + """ + Solve the problem using Depth-First Search (DFS) strategy. + + Args: + init_prompt (str): The initial prompt for the solver. + + Returns: + List[str]: The best solution path obtained through DFS. + """ + root = ThoughtNode(init_prompt) + self.thought_tree = ThoughtTree(root) + for n in range(self.config.n_solution_sample): + # fixme: 需要产生回退,当前节点不可用时回退到父节点,产生新的节点继续探索 + await self._dfs(root) + + best_solution, best_solution_path = self.update_solution() + logger.info(f"best solution is: {best_solution_path}") + return best_solution_path + + +class MCTSSolver(ThoughtSolverBase): + async def solve(self, init_prompt=""): + raise NotImplementedError + + +class TreeofThought(BaseModel): + config: ThoughtSolverConfig = Field(default_factory=ThoughtSolverConfig) + solver: ThoughtSolverBase = Field(default_factory=ThoughtSolverBase) + strategy: Strategy = Field(default=Strategy.BFS) + + class Config: + arbitrary_types_allowed = True + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self._initialize_solver(self.strategy) + + def _initialize_solver(self, strategy): + """ + Initialize the solver based on the chosen strategy. + + Args: + strategy (Strategy): The strategy to use for solving. + + Returns: + ThoughtSolverBase: An instance of the appropriate solver. + """ + if strategy == Strategy.BFS: + self.solver = BFSSolver(config=self.config) + elif strategy == Strategy.DFS: + self.solver = DFSSolver(config=self.config) + elif strategy == Strategy.MCTS: + self.solver = MCTSSolver(config=self.config) + else: + raise NotImplementedError(f"Invalid strategy: {strategy}, only support BFS/DFS/MCTS currently!") + + async def solve(self, init_prompt=""): + """ + Solve the problem using the specified strategy. + + Args: + init_prompt (str): The initial prompt for the solver. + strategy (str): The strategy to use for solving. + + Returns: + Any: The solution obtained using the selected strategy. + """ + await self.solver.solve(init_prompt) diff --git a/metagpt/strategy/tot_schema.py b/metagpt/strategy/tot_schema.py new file mode 100644 index 000000000..99b518644 --- /dev/null +++ b/metagpt/strategy/tot_schema.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# @Date : 12/25/2023 9:14 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +from enum import Enum + +from pydantic import BaseModel, Field +from metagpt.strategy.base import BaseEvaluator, BaseParser + +class MethodSelect(Enum): + SAMPLE = "sample" + GREEDY = "greedy" + + +class Strategy(Enum): + BFS = "BFS" + DFS = "DFS" + MCTS = "MCTS" + + + +class ThoughtSolverConfig(BaseModel): + max_steps: int = 3 + method_select: str = MethodSelect.GREEDY # ["sample"/"greedy"] + n_generate_sample: int = 5 # per node + n_select_sample: int = 3 # per path + n_solution_sample: int = 5 # only for dfs + parser: BaseParser = Field(default_factory=BaseParser) + evaluator: BaseEvaluator = Field(default_factory=BaseEvaluator) + +