diff --git a/examples/spo/README.md b/examples/spo/README.md index fe22fc80f..602ad9faa 100644 --- a/examples/spo/README.md +++ b/examples/spo/README.md @@ -1,4 +1,4 @@ -# SPO πŸ€– | Self-Supervised Prompt Optimizer +# SPO πŸ€– | Self-Supervised Prompt PromptOptimizer An automated prompt engineering tool for Large Language Models (LLMs), designed for universal domain adaptation. @@ -15,7 +15,7 @@ ## πŸš€ Quick Start ### 1. Configure Your API Key βš™οΈ -Configure LLM parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference) +Configure LLM parameters in `config/config2.yaml` (see `examples/spo/config2.example.yaml` for reference) ### 2. Define Your Iteration template πŸ“ Create a Iteration template file `metagpt/ext/spo/settings/task_name.yaml`: @@ -48,39 +48,39 @@ ### 2. Define Your Iteration template πŸ“ - `question`: Questions from the dataset used for iteration - `answer`: Corresponding answers. Can contain desired thinking patterns or responses instead of actual answers, or can be left empty. See `metagpt/ext/spo/settings/Navigate.yaml` for reference -### 3. Implement the Optimizer πŸ”§ +### 3. Implement the PromptOptimizer πŸ”§ Use `metagpt/ext/spo/optimize.py` to execute: ```python -from metagpt.ext.spo.components.optimizer import Optimizer +from metagpt.ext.spo.components.optimizer import PromptOptimizer from metagpt.ext.spo.utils.llm_client import SPO_LLM if __name__ == "__main__": - # Initialize LLM settings - SPO_LLM.initialize( - optimize_kwargs={"model": "claude-3-5-sonnet-20240620", "temperature": 0.7}, - evaluate_kwargs={"model": "gpt-4o-mini", "temperature": 0.3}, - execute_kwargs={"model": "gpt-4o-mini", "temperature": 0} - ) + # Initialize LLM settings + SPO_LLM.initialize( + optimize_kwargs={"model": "claude-3-5-sonnet-20240620", "temperature": 0.7}, + evaluate_kwargs={"model": "gpt-4o-mini", "temperature": 0.3}, + execute_kwargs={"model": "gpt-4o-mini", "temperature": 0} + ) - # Create and run optimizer - optimizer = Optimizer( - optimized_path="workspace", # Output directory - initial_round=1, # Starting round - max_rounds=10, # Maximum optimization rounds - template="Poem.yaml", # Template file - name="Poem", # Project name - iteration=True, # Enable iteration mode - ) + # Create and run optimizer + optimizer = PromptOptimizer( + optimized_path="workspace", # Output directory + initial_round=1, # Starting round + max_rounds=10, # Maximum optimization rounds + template="Poem.yaml", # Template file + name="Poem", # Project name + iteration=True, # Enable iteration mode + ) - optimizer.optimize() + optimizer.optimize() ``` Or you can use command line interface: ```bash -python optimize.py [options] +python -m examples.spo.optimize ``` Available command line options: diff --git a/examples/spo/optimize.py b/examples/spo/optimize.py index 102df0c06..0fa110f65 100644 --- a/examples/spo/optimize.py +++ b/examples/spo/optimize.py @@ -1,10 +1,10 @@ import argparse -from metagpt.ext.spo.components.optimizer import Optimizer +from metagpt.ext.spo.components.optimizer import PromptOptimizer from metagpt.ext.spo.utils.llm_client import SPO_LLM def parse_args(): - parser = argparse.ArgumentParser(description='SPO Optimizer CLI') + parser = argparse.ArgumentParser(description='SPO PromptOptimizer CLI') # LLM parameter parser.add_argument('--opt-model', type=str, default='claude-3-5-sonnet-20240620', @@ -20,7 +20,7 @@ def parse_args(): parser.add_argument('--exec-temp', type=float, default=0, help='Temperature for execution') - # Optimizer parameter + # PromptOptimizer parameter parser.add_argument('--workspace', type=str, default='workspace', help='Path for optimized output') parser.add_argument('--initial-round', type=int, default=1, @@ -55,7 +55,7 @@ def main(): } ) - optimizer = Optimizer( + optimizer = PromptOptimizer( optimized_path=args.workspace, initial_round=args.initial_round, max_rounds=args.max_rounds, diff --git a/metagpt/ext/spo/components/evaluator.py b/metagpt/ext/spo/components/evaluator.py index cca5159c0..f59213996 100644 --- a/metagpt/ext/spo/components/evaluator.py +++ b/metagpt/ext/spo/components/evaluator.py @@ -4,11 +4,11 @@ # @Desc : Evaluation for different datasets import asyncio from typing import Dict, Any - from metagpt.ext.spo.utils import load from metagpt.ext.spo.prompts.evaluate_prompt import EVALUATE_PROMPT import random from metagpt.ext.spo.utils.llm_client import SPO_LLM, extract_content +from metagpt.logs import logger class QuickExecute: @@ -28,7 +28,7 @@ class QuickExecute: async def fetch_answer(q: str) -> Dict[str, Any]: messages = [{"role": "user", "content": f"{self.prompt}\n\n{q}"}] try: - answer = await self.llm.responser(role="execute", messages=messages) + answer = await self.llm.responser(type="execute", messages=messages) return {'question': q, 'answer': answer} except Exception as e: return {'question': q, 'answer': str(e)} @@ -47,37 +47,34 @@ class QuickEvaluate: def __init__(self): self.llm = SPO_LLM.get_instance() - async def prompt_evaluate(self, sample: list, new_sample: list) -> bool: + async def prompt_evaluate(self, samples: list, new_samples: list) -> bool: _, requirement, qa, _ = load.load_meta_data() if random.random() < 0.5: - sample, new_sample = new_sample, sample + samples, new_samples = new_samples, samples is_swapped = True else: is_swapped = False messages = [{"role": "user", "content": EVALUATE_PROMPT.format( requirement=requirement, - sample=sample, - new_sample=new_sample, + sample=samples, + new_sample=new_samples, answers=str(qa))}] try: - response = await self.llm.responser(role="evaluate", messages=messages) + response = await self.llm.responser(type="evaluate", messages=messages) choose = extract_content(response, 'choose') - - if is_swapped: - return choose == "A" - return choose == "B" + return choose == "A" if is_swapped else choose == "B" except Exception as e: - print(e) + logger.error(e) return False if __name__ == "__main__": - execute = QuickExecute(prompt="Answer the Question,{question}") + execute = QuickExecute(prompt="Answer the Question") answers = asyncio.run(execute.prompt_evaluate()) print(answers) diff --git a/metagpt/ext/spo/components/optimizer.py b/metagpt/ext/spo/components/optimizer.py index 301b5fd54..7c5585158 100644 --- a/metagpt/ext/spo/components/optimizer.py +++ b/metagpt/ext/spo/components/optimizer.py @@ -14,7 +14,7 @@ from metagpt.ext.spo.utils.llm_client import extract_content, SPO_LLM -class Optimizer: +class PromptOptimizer: def __init__( self, optimized_path: str = None, @@ -39,7 +39,7 @@ class Optimizer: self.llm = SPO_LLM.get_instance() def optimize(self): - if self.iteration is True: + if self.iteration: for opt_round in range(self.max_rounds): loop = asyncio.new_event_loop() @@ -68,8 +68,9 @@ class Optimizer: prompt, _, _, _ = load.load_meta_data() self.prompt = prompt self.prompt_utils.write_prompt(directory, prompt=self.prompt) - new_sample = await self.evaluation_utils.execute_prompt(self, directory, initial=True) - _, answers = await self.evaluation_utils.evaluate_prompt(self, None, new_sample, path=prompt_path, data=data, initial=True) + new_samples = await self.evaluation_utils.execute_prompt(self, directory, initial=True) + _, answers = await self.evaluation_utils.evaluate_prompt(self, None, new_samples, path=prompt_path, + data=data, initial=True) self.prompt_utils.write_answers(directory, answers=answers) @@ -79,20 +80,20 @@ class Optimizer: top_round = self.data_utils.get_best_round() - sample = top_round + samples = top_round - logger.info(f"choose {sample['round']}") + logger.info(f"choose {samples['round']}") golden_answer = self.data_utils.list_to_markdown(qa) - best_answer = self.data_utils.list_to_markdown(sample["answers"]) + best_answer = self.data_utils.list_to_markdown(samples["answers"]) optimize_prompt = PROMPT_OPTIMIZE_PROMPT.format( - prompt=sample["prompt"], answers=best_answer, + prompt=samples["prompt"], answers=best_answer, requirements=requirements, golden_answers=golden_answer, count=count) - response = await self.llm.responser(role="optimize", messages=[{"role": "user", "content": optimize_prompt}]) + response = await self.llm.responser(type="optimize", messages=[{"role": "user", "content": optimize_prompt}]) modification = extract_content(response, "modification") @@ -105,19 +106,16 @@ class Optimizer: else: self.prompt = "" - logger.info(directory) - self.prompt_utils.write_prompt(directory, prompt=self.prompt) - new_sample = await self.evaluation_utils.execute_prompt(self, directory, data) + new_samples = await self.evaluation_utils.execute_prompt(self, directory, data) - success, answers = await self.evaluation_utils.evaluate_prompt(self, sample, new_sample, - path=prompt_path, + success, answers = await self.evaluation_utils.evaluate_prompt(self, samples, new_samples, path=prompt_path, data=data, initial=False) self.prompt_utils.write_answers(directory, answers=answers) - logger.info(success) + logger.info(f"Current round optimization successful:{success}") logger.info(f"now is {self.round + 1}") diff --git a/metagpt/ext/spo/optimize.py b/metagpt/ext/spo/optimize.py index fa6d7e792..b2fd814c3 100644 --- a/metagpt/ext/spo/optimize.py +++ b/metagpt/ext/spo/optimize.py @@ -1,4 +1,4 @@ -from metagpt.ext.spo.components.optimizer import Optimizer +from metagpt.ext.spo.components.optimizer import PromptOptimizer from metagpt.ext.spo.utils.llm_client import SPO_LLM @@ -10,7 +10,7 @@ if __name__ == "__main__": execute_kwargs={"model": "gpt-4o-mini", "temperature": 0} ) - optimizer = Optimizer( + optimizer = PromptOptimizer( optimized_path="workspace", initial_round=1, max_rounds=10, diff --git a/metagpt/ext/spo/prompts/optimize_prompt.py b/metagpt/ext/spo/prompts/optimize_prompt.py index 09e20acbc..f6ca81e33 100644 --- a/metagpt/ext/spo/prompts/optimize_prompt.py +++ b/metagpt/ext/spo/prompts/optimize_prompt.py @@ -1,5 +1,5 @@ PROMPT_OPTIMIZE_PROMPT = """ -You are building a prompt to address user requirement.Based on the given prompt, +You are building a prompt to address user requirement. Based on the given prompt, please reconstruct and optimize it. You can add, modify, or delete prompts. Please include a single modification in XML tags in your reply. During the optimization, you can incorporate any thinking models. This is a prompt that performed excellently in a previous iteration. You must make further optimizations and improvements based on this prompt. The modified prompt must differ from the provided example. diff --git a/metagpt/ext/spo/utils/data_utils.py b/metagpt/ext/spo/utils/data_utils.py index 7814959a7..cb75504b0 100644 --- a/metagpt/ext/spo/utils/data_utils.py +++ b/metagpt/ext/spo/utils/data_utils.py @@ -4,10 +4,8 @@ import os import random from typing import Union, List, Dict import pandas as pd -import yaml +from metagpt.logs import logger -FILE_NAME = '' -SAMPLE_K = 3 class DataUtils: @@ -52,21 +50,36 @@ class DataUtils: json.dump(data, file, default=str, indent=4) def _load_scores(self): - rounds_dir = os.path.join(self.root_path, "prompts") - result_file = os.path.join(rounds_dir, "results.json") self.top_scores = [] - with open(result_file, "r", encoding="utf-8") as file: - data = json.load(file) - df = pd.DataFrame(data) + try: + if not os.path.exists(result_file): + logger.warning(f"Results file not found at {result_file}") + return self.top_scores - for index, row in df.iterrows(): - self.top_scores.append( - {"round": row["round"], "succeed": row["succeed"], "prompt": row["prompt"], "answers": row['answers']}) + with open(result_file, "r", encoding="utf-8") as file: + data = json.load(file) - self.top_scores.sort(key=lambda x: x["round"], reverse=True) + df = pd.DataFrame(data) + + for index, row in df.iterrows(): + self.top_scores.append({ + "round": row["round"], + "succeed": row["succeed"], + "prompt": row["prompt"], + "answers": row['answers'] + }) + + self.top_scores.sort(key=lambda x: x["round"], reverse=True) + + except FileNotFoundError: + logger.error(f"Could not find results file: {result_file}") + except json.JSONDecodeError: + logger.error(f"Invalid JSON format in file: {result_file}") + except Exception as e: + logger.error(f"Unexpected error loading scores: {str(e)}") return self.top_scores diff --git a/metagpt/ext/spo/utils/evaluation_utils.py b/metagpt/ext/spo/utils/evaluation_utils.py index 2e2dd8016..5b598070b 100644 --- a/metagpt/ext/spo/utils/evaluation_utils.py +++ b/metagpt/ext/spo/utils/evaluation_utils.py @@ -2,6 +2,7 @@ from metagpt.ext.spo.components.evaluator import QuickEvaluate, QuickExecute from metagpt.logs import logger import tiktoken +EVALUATION_REPETITION = 4 def count_tokens(sample): if sample is None: @@ -17,9 +18,9 @@ class EvaluationUtils: async def execute_prompt(self, optimizer, prompt_path, initial=False): optimizer.prompt = optimizer.prompt_utils.load_prompt(optimizer.round, prompt_path) - evaluator = QuickExecute(prompt=optimizer.prompt) + executor = QuickExecute(prompt=optimizer.prompt) - answers = await evaluator.prompt_execute() + answers = await executor.prompt_execute() cur_round = optimizer.round + 1 if not initial else optimizer.round @@ -27,17 +28,17 @@ class EvaluationUtils: return new_data - async def evaluate_prompt(self, optimizer, sample, new_sample, path, data, initial=False): + async def evaluate_prompt(self, optimizer, samples, new_samples, path, data, initial=False): evaluator = QuickEvaluate() - new_token = count_tokens(new_sample) + new_token = count_tokens(new_samples) if initial is True: succeed = True else: evaluation_results = [] - for _ in range(4): - result = await evaluator.prompt_evaluate(sample=sample, new_sample=new_sample) + for _ in range(EVALUATION_REPETITION): + result = await evaluator.prompt_evaluate(samples=samples, new_samples=new_samples) evaluation_results.append(result) logger.info(evaluation_results) @@ -46,8 +47,8 @@ class EvaluationUtils: false_count = evaluation_results.count(False) succeed = true_count > false_count - new_data = optimizer.data_utils.create_result_data(new_sample['round'], new_sample['answers'], - new_sample['prompt'], succeed, new_token) + new_data = optimizer.data_utils.create_result_data(new_samples['round'], new_samples['answers'], + new_samples['prompt'], succeed, new_token) data.append(new_data) @@ -55,6 +56,6 @@ class EvaluationUtils: optimizer.data_utils.save_results(result_path, data) - answers = new_sample['answers'] + answers = new_samples['answers'] return succeed, answers diff --git a/metagpt/ext/spo/utils/llm_client.py b/metagpt/ext/spo/utils/llm_client.py index 007ef1bb8..3ad31fcd5 100644 --- a/metagpt/ext/spo/utils/llm_client.py +++ b/metagpt/ext/spo/utils/llm_client.py @@ -15,20 +15,33 @@ class SPO_LLM: def _load_llm_config(self, kwargs: dict): model = kwargs.get('model') - config = ModelsConfig.default().get(model).model_copy() + if not model: + raise ValueError("'model' parameter is required") - for key, value in kwargs.items(): - if hasattr(config, key): - setattr(config, key, value) + try: + model_config = ModelsConfig.default().get(model) + if model_config is None: + raise ValueError(f"Model '{model}' not found in configuration") - return config + config = model_config.model_copy() - async def responser(self, role: str, messages): - if role == "optimize": + for key, value in kwargs.items(): + if hasattr(config, key): + setattr(config, key, value) + + return config + + except AttributeError as e: + raise ValueError(f"Model '{model}' not found in configuration") + except Exception as e: + raise ValueError(f"Error loading configuration for model '{model}': {str(e)}") + + async def responser(self, type: str, messages): + if type == "optimize": response = await self.optimize_llm.acompletion(messages) - elif role == "evaluate": + elif type == "evaluate": response = await self.evaluate_llm.acompletion(messages) - elif role == "execute": + elif type == "execute": response = await self.execute_llm.acompletion(messages) else: raise ValueError("Please set the correct name: optimize, evaluate or execute") @@ -66,11 +79,11 @@ async def spo(): # test messages hello_msg = [{"role": "user", "content": "hello"}] - response = await llm.responser(role='execute', messages=hello_msg) + response = await llm.responser(type='execute', messages=hello_msg) print(f"AI: {response}") - response = await llm.responser(role='optimize', messages=hello_msg) + response = await llm.responser(type='optimize', messages=hello_msg) print(f"AI: {response}") - response = await llm.responser(role='evaluate', messages=hello_msg) + response = await llm.responser(type='evaluate', messages=hello_msg) print(f"AI: {response}") diff --git a/metagpt/ext/spo/utils/load.py b/metagpt/ext/spo/utils/load.py index 2e931a615..14dee74c6 100644 --- a/metagpt/ext/spo/utils/load.py +++ b/metagpt/ext/spo/utils/load.py @@ -2,7 +2,7 @@ import yaml import random import os -FILE_NAME = 'meta.yaml' +FILE_NAME = '' SAMPLE_K = 3 def set_file_name(name):