mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-25 00:36:55 +02:00
update SPO code
This commit is contained in:
parent
5140804f75
commit
8888cf9652
10 changed files with 112 additions and 90 deletions
|
|
@ -1,4 +1,4 @@
|
|||
# SPO 🤖 | Self-Supervised Prompt Optimizer
|
||||
# SPO 🤖 | Self-Supervised Prompt PromptOptimizer
|
||||
|
||||
An automated prompt engineering tool for Large Language Models (LLMs), designed for universal domain adaptation.
|
||||
|
||||
|
|
@ -15,7 +15,7 @@ ## 🚀 Quick Start
|
|||
|
||||
### 1. Configure Your API Key ⚙️
|
||||
|
||||
Configure LLM parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
|
||||
Configure LLM parameters in `config/config2.yaml` (see `examples/spo/config2.example.yaml` for reference)
|
||||
### 2. Define Your Iteration template 📝
|
||||
|
||||
Create a Iteration template file `metagpt/ext/spo/settings/task_name.yaml`:
|
||||
|
|
@ -48,39 +48,39 @@ ### 2. Define Your Iteration template 📝
|
|||
- `question`: Questions from the dataset used for iteration
|
||||
- `answer`: Corresponding answers. Can contain desired thinking patterns or responses instead of actual answers, or can be left empty. See `metagpt/ext/spo/settings/Navigate.yaml` for reference
|
||||
|
||||
### 3. Implement the Optimizer 🔧
|
||||
### 3. Implement the PromptOptimizer 🔧
|
||||
|
||||
Use `metagpt/ext/spo/optimize.py` to execute:
|
||||
|
||||
```python
|
||||
from metagpt.ext.spo.components.optimizer import Optimizer
|
||||
from metagpt.ext.spo.components.optimizer import PromptOptimizer
|
||||
from metagpt.ext.spo.utils.llm_client import SPO_LLM
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Initialize LLM settings
|
||||
SPO_LLM.initialize(
|
||||
optimize_kwargs={"model": "claude-3-5-sonnet-20240620", "temperature": 0.7},
|
||||
evaluate_kwargs={"model": "gpt-4o-mini", "temperature": 0.3},
|
||||
execute_kwargs={"model": "gpt-4o-mini", "temperature": 0}
|
||||
)
|
||||
# Initialize LLM settings
|
||||
SPO_LLM.initialize(
|
||||
optimize_kwargs={"model": "claude-3-5-sonnet-20240620", "temperature": 0.7},
|
||||
evaluate_kwargs={"model": "gpt-4o-mini", "temperature": 0.3},
|
||||
execute_kwargs={"model": "gpt-4o-mini", "temperature": 0}
|
||||
)
|
||||
|
||||
# Create and run optimizer
|
||||
optimizer = Optimizer(
|
||||
optimized_path="workspace", # Output directory
|
||||
initial_round=1, # Starting round
|
||||
max_rounds=10, # Maximum optimization rounds
|
||||
template="Poem.yaml", # Template file
|
||||
name="Poem", # Project name
|
||||
iteration=True, # Enable iteration mode
|
||||
)
|
||||
# Create and run optimizer
|
||||
optimizer = PromptOptimizer(
|
||||
optimized_path="workspace", # Output directory
|
||||
initial_round=1, # Starting round
|
||||
max_rounds=10, # Maximum optimization rounds
|
||||
template="Poem.yaml", # Template file
|
||||
name="Poem", # Project name
|
||||
iteration=True, # Enable iteration mode
|
||||
)
|
||||
|
||||
optimizer.optimize()
|
||||
optimizer.optimize()
|
||||
```
|
||||
|
||||
Or you can use command line interface:
|
||||
|
||||
```bash
|
||||
python optimize.py [options]
|
||||
python -m examples.spo.optimize
|
||||
```
|
||||
|
||||
Available command line options:
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import argparse
|
||||
from metagpt.ext.spo.components.optimizer import Optimizer
|
||||
from metagpt.ext.spo.components.optimizer import PromptOptimizer
|
||||
from metagpt.ext.spo.utils.llm_client import SPO_LLM
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='SPO Optimizer CLI')
|
||||
parser = argparse.ArgumentParser(description='SPO PromptOptimizer CLI')
|
||||
|
||||
# LLM parameter
|
||||
parser.add_argument('--opt-model', type=str, default='claude-3-5-sonnet-20240620',
|
||||
|
|
@ -20,7 +20,7 @@ def parse_args():
|
|||
parser.add_argument('--exec-temp', type=float, default=0,
|
||||
help='Temperature for execution')
|
||||
|
||||
# Optimizer parameter
|
||||
# PromptOptimizer parameter
|
||||
parser.add_argument('--workspace', type=str, default='workspace',
|
||||
help='Path for optimized output')
|
||||
parser.add_argument('--initial-round', type=int, default=1,
|
||||
|
|
@ -55,7 +55,7 @@ def main():
|
|||
}
|
||||
)
|
||||
|
||||
optimizer = Optimizer(
|
||||
optimizer = PromptOptimizer(
|
||||
optimized_path=args.workspace,
|
||||
initial_round=args.initial_round,
|
||||
max_rounds=args.max_rounds,
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
# @Desc : Evaluation for different datasets
|
||||
import asyncio
|
||||
from typing import Dict, Any
|
||||
|
||||
from metagpt.ext.spo.utils import load
|
||||
from metagpt.ext.spo.prompts.evaluate_prompt import EVALUATE_PROMPT
|
||||
import random
|
||||
from metagpt.ext.spo.utils.llm_client import SPO_LLM, extract_content
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class QuickExecute:
|
||||
|
|
@ -28,7 +28,7 @@ class QuickExecute:
|
|||
async def fetch_answer(q: str) -> Dict[str, Any]:
|
||||
messages = [{"role": "user", "content": f"{self.prompt}\n\n{q}"}]
|
||||
try:
|
||||
answer = await self.llm.responser(role="execute", messages=messages)
|
||||
answer = await self.llm.responser(type="execute", messages=messages)
|
||||
return {'question': q, 'answer': answer}
|
||||
except Exception as e:
|
||||
return {'question': q, 'answer': str(e)}
|
||||
|
|
@ -47,37 +47,34 @@ class QuickEvaluate:
|
|||
def __init__(self):
|
||||
self.llm = SPO_LLM.get_instance()
|
||||
|
||||
async def prompt_evaluate(self, sample: list, new_sample: list) -> bool:
|
||||
async def prompt_evaluate(self, samples: list, new_samples: list) -> bool:
|
||||
_, requirement, qa, _ = load.load_meta_data()
|
||||
|
||||
if random.random() < 0.5:
|
||||
sample, new_sample = new_sample, sample
|
||||
samples, new_samples = new_samples, samples
|
||||
is_swapped = True
|
||||
else:
|
||||
is_swapped = False
|
||||
|
||||
messages = [{"role": "user", "content": EVALUATE_PROMPT.format(
|
||||
requirement=requirement,
|
||||
sample=sample,
|
||||
new_sample=new_sample,
|
||||
sample=samples,
|
||||
new_sample=new_samples,
|
||||
answers=str(qa))}]
|
||||
|
||||
try:
|
||||
response = await self.llm.responser(role="evaluate", messages=messages)
|
||||
response = await self.llm.responser(type="evaluate", messages=messages)
|
||||
choose = extract_content(response, 'choose')
|
||||
|
||||
if is_swapped:
|
||||
return choose == "A"
|
||||
return choose == "B"
|
||||
return choose == "A" if is_swapped else choose == "B"
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
return False
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
execute = QuickExecute(prompt="Answer the Question,{question}")
|
||||
execute = QuickExecute(prompt="Answer the Question")
|
||||
|
||||
answers = asyncio.run(execute.prompt_evaluate())
|
||||
print(answers)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from metagpt.ext.spo.utils.llm_client import extract_content, SPO_LLM
|
|||
|
||||
|
||||
|
||||
class Optimizer:
|
||||
class PromptOptimizer:
|
||||
def __init__(
|
||||
self,
|
||||
optimized_path: str = None,
|
||||
|
|
@ -39,7 +39,7 @@ class Optimizer:
|
|||
self.llm = SPO_LLM.get_instance()
|
||||
|
||||
def optimize(self):
|
||||
if self.iteration is True:
|
||||
if self.iteration:
|
||||
|
||||
for opt_round in range(self.max_rounds):
|
||||
loop = asyncio.new_event_loop()
|
||||
|
|
@ -68,8 +68,9 @@ class Optimizer:
|
|||
prompt, _, _, _ = load.load_meta_data()
|
||||
self.prompt = prompt
|
||||
self.prompt_utils.write_prompt(directory, prompt=self.prompt)
|
||||
new_sample = await self.evaluation_utils.execute_prompt(self, directory, initial=True)
|
||||
_, answers = await self.evaluation_utils.evaluate_prompt(self, None, new_sample, path=prompt_path, data=data, initial=True)
|
||||
new_samples = await self.evaluation_utils.execute_prompt(self, directory, initial=True)
|
||||
_, answers = await self.evaluation_utils.evaluate_prompt(self, None, new_samples, path=prompt_path,
|
||||
data=data, initial=True)
|
||||
self.prompt_utils.write_answers(directory, answers=answers)
|
||||
|
||||
|
||||
|
|
@ -79,20 +80,20 @@ class Optimizer:
|
|||
|
||||
top_round = self.data_utils.get_best_round()
|
||||
|
||||
sample = top_round
|
||||
samples = top_round
|
||||
|
||||
logger.info(f"choose {sample['round']}")
|
||||
logger.info(f"choose {samples['round']}")
|
||||
|
||||
golden_answer = self.data_utils.list_to_markdown(qa)
|
||||
best_answer = self.data_utils.list_to_markdown(sample["answers"])
|
||||
best_answer = self.data_utils.list_to_markdown(samples["answers"])
|
||||
|
||||
optimize_prompt = PROMPT_OPTIMIZE_PROMPT.format(
|
||||
prompt=sample["prompt"], answers=best_answer,
|
||||
prompt=samples["prompt"], answers=best_answer,
|
||||
requirements=requirements,
|
||||
golden_answers=golden_answer,
|
||||
count=count)
|
||||
|
||||
response = await self.llm.responser(role="optimize", messages=[{"role": "user", "content": optimize_prompt}])
|
||||
response = await self.llm.responser(type="optimize", messages=[{"role": "user", "content": optimize_prompt}])
|
||||
|
||||
modification = extract_content(response, "modification")
|
||||
|
||||
|
|
@ -105,19 +106,16 @@ class Optimizer:
|
|||
else:
|
||||
self.prompt = ""
|
||||
|
||||
logger.info(directory)
|
||||
|
||||
self.prompt_utils.write_prompt(directory, prompt=self.prompt)
|
||||
|
||||
new_sample = await self.evaluation_utils.execute_prompt(self, directory, data)
|
||||
new_samples = await self.evaluation_utils.execute_prompt(self, directory, data)
|
||||
|
||||
success, answers = await self.evaluation_utils.evaluate_prompt(self, sample, new_sample,
|
||||
path=prompt_path,
|
||||
success, answers = await self.evaluation_utils.evaluate_prompt(self, samples, new_samples, path=prompt_path,
|
||||
data=data, initial=False)
|
||||
|
||||
self.prompt_utils.write_answers(directory, answers=answers)
|
||||
|
||||
logger.info(success)
|
||||
logger.info(f"Current round optimization successful:{success}")
|
||||
|
||||
logger.info(f"now is {self.round + 1}")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from metagpt.ext.spo.components.optimizer import Optimizer
|
||||
from metagpt.ext.spo.components.optimizer import PromptOptimizer
|
||||
from metagpt.ext.spo.utils.llm_client import SPO_LLM
|
||||
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ if __name__ == "__main__":
|
|||
execute_kwargs={"model": "gpt-4o-mini", "temperature": 0}
|
||||
)
|
||||
|
||||
optimizer = Optimizer(
|
||||
optimizer = PromptOptimizer(
|
||||
optimized_path="workspace",
|
||||
initial_round=1,
|
||||
max_rounds=10,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
PROMPT_OPTIMIZE_PROMPT = """
|
||||
You are building a prompt to address user requirement.Based on the given prompt,
|
||||
You are building a prompt to address user requirement. Based on the given prompt,
|
||||
please reconstruct and optimize it. You can add, modify, or delete prompts. Please include a single modification in
|
||||
XML tags in your reply. During the optimization, you can incorporate any thinking models.
|
||||
This is a prompt that performed excellently in a previous iteration. You must make further optimizations and improvements based on this prompt. The modified prompt must differ from the provided example.
|
||||
|
|
|
|||
|
|
@ -4,10 +4,8 @@ import os
|
|||
import random
|
||||
from typing import Union, List, Dict
|
||||
import pandas as pd
|
||||
import yaml
|
||||
from metagpt.logs import logger
|
||||
|
||||
FILE_NAME = ''
|
||||
SAMPLE_K = 3
|
||||
|
||||
|
||||
class DataUtils:
|
||||
|
|
@ -52,21 +50,36 @@ class DataUtils:
|
|||
json.dump(data, file, default=str, indent=4)
|
||||
|
||||
def _load_scores(self):
|
||||
|
||||
rounds_dir = os.path.join(self.root_path, "prompts")
|
||||
|
||||
result_file = os.path.join(rounds_dir, "results.json")
|
||||
self.top_scores = []
|
||||
|
||||
with open(result_file, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
df = pd.DataFrame(data)
|
||||
try:
|
||||
if not os.path.exists(result_file):
|
||||
logger.warning(f"Results file not found at {result_file}")
|
||||
return self.top_scores
|
||||
|
||||
for index, row in df.iterrows():
|
||||
self.top_scores.append(
|
||||
{"round": row["round"], "succeed": row["succeed"], "prompt": row["prompt"], "answers": row['answers']})
|
||||
with open(result_file, "r", encoding="utf-8") as file:
|
||||
data = json.load(file)
|
||||
|
||||
self.top_scores.sort(key=lambda x: x["round"], reverse=True)
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
for index, row in df.iterrows():
|
||||
self.top_scores.append({
|
||||
"round": row["round"],
|
||||
"succeed": row["succeed"],
|
||||
"prompt": row["prompt"],
|
||||
"answers": row['answers']
|
||||
})
|
||||
|
||||
self.top_scores.sort(key=lambda x: x["round"], reverse=True)
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Could not find results file: {result_file}")
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Invalid JSON format in file: {result_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error loading scores: {str(e)}")
|
||||
|
||||
return self.top_scores
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ from metagpt.ext.spo.components.evaluator import QuickEvaluate, QuickExecute
|
|||
from metagpt.logs import logger
|
||||
import tiktoken
|
||||
|
||||
EVALUATION_REPETITION = 4
|
||||
|
||||
def count_tokens(sample):
|
||||
if sample is None:
|
||||
|
|
@ -17,9 +18,9 @@ class EvaluationUtils:
|
|||
async def execute_prompt(self, optimizer, prompt_path, initial=False):
|
||||
|
||||
optimizer.prompt = optimizer.prompt_utils.load_prompt(optimizer.round, prompt_path)
|
||||
evaluator = QuickExecute(prompt=optimizer.prompt)
|
||||
executor = QuickExecute(prompt=optimizer.prompt)
|
||||
|
||||
answers = await evaluator.prompt_execute()
|
||||
answers = await executor.prompt_execute()
|
||||
|
||||
cur_round = optimizer.round + 1 if not initial else optimizer.round
|
||||
|
||||
|
|
@ -27,17 +28,17 @@ class EvaluationUtils:
|
|||
|
||||
return new_data
|
||||
|
||||
async def evaluate_prompt(self, optimizer, sample, new_sample, path, data, initial=False):
|
||||
async def evaluate_prompt(self, optimizer, samples, new_samples, path, data, initial=False):
|
||||
|
||||
evaluator = QuickEvaluate()
|
||||
new_token = count_tokens(new_sample)
|
||||
new_token = count_tokens(new_samples)
|
||||
|
||||
if initial is True:
|
||||
succeed = True
|
||||
else:
|
||||
evaluation_results = []
|
||||
for _ in range(4):
|
||||
result = await evaluator.prompt_evaluate(sample=sample, new_sample=new_sample)
|
||||
for _ in range(EVALUATION_REPETITION):
|
||||
result = await evaluator.prompt_evaluate(samples=samples, new_samples=new_samples)
|
||||
evaluation_results.append(result)
|
||||
|
||||
logger.info(evaluation_results)
|
||||
|
|
@ -46,8 +47,8 @@ class EvaluationUtils:
|
|||
false_count = evaluation_results.count(False)
|
||||
succeed = true_count > false_count
|
||||
|
||||
new_data = optimizer.data_utils.create_result_data(new_sample['round'], new_sample['answers'],
|
||||
new_sample['prompt'], succeed, new_token)
|
||||
new_data = optimizer.data_utils.create_result_data(new_samples['round'], new_samples['answers'],
|
||||
new_samples['prompt'], succeed, new_token)
|
||||
|
||||
data.append(new_data)
|
||||
|
||||
|
|
@ -55,6 +56,6 @@ class EvaluationUtils:
|
|||
|
||||
optimizer.data_utils.save_results(result_path, data)
|
||||
|
||||
answers = new_sample['answers']
|
||||
answers = new_samples['answers']
|
||||
|
||||
return succeed, answers
|
||||
|
|
|
|||
|
|
@ -15,20 +15,33 @@ class SPO_LLM:
|
|||
|
||||
def _load_llm_config(self, kwargs: dict):
|
||||
model = kwargs.get('model')
|
||||
config = ModelsConfig.default().get(model).model_copy()
|
||||
if not model:
|
||||
raise ValueError("'model' parameter is required")
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(config, key):
|
||||
setattr(config, key, value)
|
||||
try:
|
||||
model_config = ModelsConfig.default().get(model)
|
||||
if model_config is None:
|
||||
raise ValueError(f"Model '{model}' not found in configuration")
|
||||
|
||||
return config
|
||||
config = model_config.model_copy()
|
||||
|
||||
async def responser(self, role: str, messages):
|
||||
if role == "optimize":
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(config, key):
|
||||
setattr(config, key, value)
|
||||
|
||||
return config
|
||||
|
||||
except AttributeError as e:
|
||||
raise ValueError(f"Model '{model}' not found in configuration")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error loading configuration for model '{model}': {str(e)}")
|
||||
|
||||
async def responser(self, type: str, messages):
|
||||
if type == "optimize":
|
||||
response = await self.optimize_llm.acompletion(messages)
|
||||
elif role == "evaluate":
|
||||
elif type == "evaluate":
|
||||
response = await self.evaluate_llm.acompletion(messages)
|
||||
elif role == "execute":
|
||||
elif type == "execute":
|
||||
response = await self.execute_llm.acompletion(messages)
|
||||
else:
|
||||
raise ValueError("Please set the correct name: optimize, evaluate or execute")
|
||||
|
|
@ -66,11 +79,11 @@ async def spo():
|
|||
|
||||
# test messages
|
||||
hello_msg = [{"role": "user", "content": "hello"}]
|
||||
response = await llm.responser(role='execute', messages=hello_msg)
|
||||
response = await llm.responser(type='execute', messages=hello_msg)
|
||||
print(f"AI: {response}")
|
||||
response = await llm.responser(role='optimize', messages=hello_msg)
|
||||
response = await llm.responser(type='optimize', messages=hello_msg)
|
||||
print(f"AI: {response}")
|
||||
response = await llm.responser(role='evaluate', messages=hello_msg)
|
||||
response = await llm.responser(type='evaluate', messages=hello_msg)
|
||||
print(f"AI: {response}")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import yaml
|
|||
import random
|
||||
import os
|
||||
|
||||
FILE_NAME = 'meta.yaml'
|
||||
FILE_NAME = ''
|
||||
SAMPLE_K = 3
|
||||
|
||||
def set_file_name(name):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue