diff --git a/examples/spo/README.md b/examples/spo/README.md index 273bbc2df..e52309c2f 100644 --- a/examples/spo/README.md +++ b/examples/spo/README.md @@ -16,7 +16,7 @@ ## ✨ Core Advantages - ⚡ **Universal Adaptation** - _Closed & open-ended tasks supported_ - 🔄 **Self-Evolving** - _Auto-optimization via LLM-as-judge mechanism_ -[Read our paper](./Self-Supervised Prompt Optimization.pdf) +[Read our paper on arXiv](https://arxiv.org/pdf/2502.06855) ## 📊 Experiment @@ -169,10 +169,12 @@ ## Citation ``` @misc{xiang2025spo, - title = {Self-Supervised Prompt Optimization}, - author = {Xiang, Jinyu and Zhang, Jiayi and Yu, Zhaoyang and Teng, Fengwei and Tu, Jinhao and Liang, Xinbing and Hong, Sirui and Wu, Chenglin and Luo, Yuyu}, - year = {2025}, - url = {https://github.com/geekan/MetaGPT/blob/main/examples/spo/Self-Supervised Prompt Optimization.pdf}, - note = {Code available at: https://github.com/geekan/MetaGPT/blob/main/examples/spo} + title={Self-Supervised Prompt Optimization}, + author={Jinyu Xiang and Jiayi Zhang and Zhaoyang Yu and Fengwei Teng and Jinhao Tu and Xinbing Liang and Sirui Hong and Chenglin Wu and Yuyu Luo}, + year={2025}, + eprint={2502.06855}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2502.06855}, } ``` \ No newline at end of file diff --git a/examples/spo/Self-Supervised Prompt Optimization.pdf b/examples/spo/Self-Supervised Prompt Optimization.pdf deleted file mode 100644 index 79d24e329..000000000 Binary files a/examples/spo/Self-Supervised Prompt Optimization.pdf and /dev/null differ diff --git a/metagpt/ext/spo/app.py b/metagpt/ext/spo/app.py index ddab67777..563eb92ff 100644 --- a/metagpt/ext/spo/app.py +++ b/metagpt/ext/spo/app.py @@ -184,6 +184,46 @@ def main(): st.success("Optimization completed!") + st.header("Optimization Results") + + prompt_path = f"{optimizer.root_path}/prompts" + result_data = optimizer.data_utils.load_results(prompt_path) + + for result in result_data: + round_num = result["round"] + success = result["succeed"] + prompt = result["prompt"] + + with st.expander(f"Round {round_num} {':white_check_mark:' if success else ':x:'}"): + st.markdown("**Prompt:**") + st.code(prompt, language="text") + st.markdown("
", unsafe_allow_html=True) + + col1, col2 = st.columns(2) + with col1: + st.markdown(f"**Status:** {'Success ✅ ' if success else 'Failed ❌ '}") + with col2: + st.markdown(f"**Tokens:** {result['tokens']}") + + st.markdown("**Answers:**") + for idx, answer in enumerate(result["answers"]): + st.markdown(f"**Question {idx + 1}:**") + st.text(answer["question"]) + st.markdown("**Answer:**") + st.text(answer["answer"]) + st.markdown("---") + + # Summary + success_count = sum(1 for r in result_data if r["succeed"]) + total_rounds = len(result_data) + + st.markdown("### Summary") + col1, col2 = st.columns(2) + with col1: + st.metric("Total Rounds", total_rounds) + with col2: + st.metric("Successful Rounds", success_count) + except Exception as e: st.error(f"An error occurred: {str(e)}") _logger.error(f"Error during optimization: {str(e)}")