fix notebook format

This commit is contained in:
Ray 2025-08-22 01:33:37 +08:00
parent cf0a599cff
commit 25c8d9079b

View file

@ -107,7 +107,8 @@
"from pageindex import PageIndexClient\n", "from pageindex import PageIndexClient\n",
"from pprint import pprint\n", "from pprint import pprint\n",
"\n", "\n",
"PAGEINDEX_API_KEY = \"YOUR_PAGEINDEX_API_KEY\" # Get your PageIndex API key from https://dash.pageindex.ai/api-keys\n", "# Get your PageIndex API key from https://dash.pageindex.ai/api-keys\n",
"PAGEINDEX_API_KEY = \"YOUR_PAGEINDEX_API_KEY\"\n",
"OPENAI_API_KEY = \"YOUR_OPENAI_API_KEY\"\n", "OPENAI_API_KEY = \"YOUR_OPENAI_API_KEY\"\n",
"\n", "\n",
"pi_client = PageIndexClient(api_key=PAGEINDEX_API_KEY)" "pi_client = PageIndexClient(api_key=PAGEINDEX_API_KEY)"
@ -124,7 +125,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 63,
"metadata": { "metadata": {
"id": "hmj3POkDcG1N" "id": "hmj3POkDcG1N"
}, },
@ -139,7 +140,7 @@
" )\n", " )\n",
" return response.choices[0].message.content.strip()\n", " return response.choices[0].message.content.strip()\n",
"\n", "\n",
"def remove_fields(data, fields=['text'], max_len=50):\n", "def remove_fields(data, fields=['text'], max_len=40):\n",
" if isinstance(data, dict):\n", " if isinstance(data, dict):\n",
" return {k: remove_fields(v, fields)\n", " return {k: remove_fields(v, fields)\n",
" for k, v in data.items() if k not in fields}\n", " for k, v in data.items() if k not in fields}\n",
@ -234,7 +235,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 61, "execution_count": null,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
@ -249,78 +250,73 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Simplified Tree Structure of the Document:\n", "Simplified Tree Structure of the Document:\n",
"[{'title': 'DeepSeek-R1: Incentivizing Reasoning Capability in...',\n", "[{'title': 'DeepSeek-R1: Incentivizing Reasoning Cap...',\n",
" 'node_id': '0000',\n", " 'node_id': '0000',\n",
" 'prefix_summary': '# DeepSeek-R1: Incentivizing Reasoning Capability ...',\n", " 'prefix_summary': '# DeepSeek-R1: Incentivizing Reasoning C...',\n",
" 'nodes': [{'title': 'Abstract',\n", " 'nodes': [{'title': 'Abstract',\n",
" 'node_id': '0001',\n", " 'node_id': '0001',\n",
" 'summary': 'The partial document introduces two reasoning mode...'},\n", " 'summary': 'The partial document introduces two reas...'},\n",
" {'title': 'Contents',\n", " {'title': 'Contents',\n",
" 'node_id': '0002',\n", " 'node_id': '0002',\n",
" 'summary': 'This partial document provides a detailed table of...'},\n", " 'summary': 'This partial document provides a detaile...'},\n",
" {'title': '1. Introduction',\n", " {'title': '1. Introduction',\n",
" 'node_id': '0003',\n", " 'node_id': '0003',\n",
" 'prefix_summary': 'The partial document introduces recent advancement...',\n", " 'prefix_summary': 'The partial document introduces recent a...',\n",
" 'nodes': [{'title': '1.1. Contributions',\n", " 'nodes': [{'title': '1.1. Contributions',\n",
" 'node_id': '0004',\n", " 'node_id': '0004',\n",
" 'summary': 'This partial document outlines the main contributi...'},\n", " 'summary': 'This partial document outlines the main ...'},\n",
" {'title': '1.2. Summary of Evaluation Results',\n", " {'title': '1.2. Summary of Evaluation Results',\n",
" 'node_id': '0005',\n", " 'node_id': '0005',\n",
" 'summary': 'The partial document provides a summary of evaluat...'}]},\n", " 'summary': 'The partial document provides a summary ...'}]},\n",
" {'title': '2. Approach',\n", " {'title': '2. Approach',\n",
" 'node_id': '0006',\n", " 'node_id': '0006',\n",
" 'prefix_summary': '## 2. Approach\\n',\n", " 'prefix_summary': '## 2. Approach\\n',\n",
" 'nodes': [{'title': '2.1. Overview',\n", " 'nodes': [{'title': '2.1. Overview',\n",
" 'node_id': '0007',\n", " 'node_id': '0007',\n",
" 'summary': '### 2.1. Overview\\n\\nPrevious work has heavily relie...'},\n", " 'summary': '### 2.1. Overview\\n\\nPrevious work has hea...'},\n",
" {'title': '2.2. DeepSeek-R1-Zero: Reinforcement Learning on t...',\n", " {'title': '2.2. DeepSeek-R1-Zero: Reinforcement Lea...',\n",
" 'node_id': '0008',\n", " 'node_id': '0008',\n",
" 'prefix_summary': '### 2.2. DeepSeek-R1-Zero: Reinforcement Learning ...',\n", " 'prefix_summary': '### 2.2. DeepSeek-R1-Zero: Reinforcement...',\n",
" 'nodes': [{'title': '2.2.1. Reinforcement Learning Algorithm',\n", " 'nodes': [{'title': '2.2.1. Reinforcement Learning Algorithm',\n",
" 'node_id': '0009',\n", " 'node_id': '0009',\n",
" 'summary': 'This partial document describes the Group '\n", " 'summary': 'This partial document describes the Grou...'},\n",
" 'Relative...'},\n",
" {'title': '2.2.2. Reward Modeling',\n", " {'title': '2.2.2. Reward Modeling',\n",
" 'node_id': '0010',\n", " 'node_id': '0010',\n",
" 'summary': 'This partial document discusses the reward '\n", " 'summary': 'This partial document discusses the rewa...'},\n",
" 'modelin...'},\n",
" {'title': '2.2.3. Training Template',\n", " {'title': '2.2.3. Training Template',\n",
" 'node_id': '0011',\n", " 'node_id': '0011',\n",
" 'summary': '#### 2.2.3. Training Template\\n'\n", " 'summary': '#### 2.2.3. Training Template\\n\\nTo train ...'},\n",
" '\\n'\n", " {'title': '2.2.4. Performance, Self-evolution Proce...',\n",
" 'To train DeepSeek-R...'},\n",
" {'title': '2.2.4. Performance, Self-evolution Process and Aha...',\n",
" 'node_id': '0012',\n", " 'node_id': '0012',\n",
" 'summary': 'This partial document discusses the performance, '\n", " 'summary': 'This partial document discusses the perf...'}]},\n",
" 's...'}]},\n", " {'title': '2.3. DeepSeek-R1: Reinforcement Learning...',\n",
" {'title': '2.3. DeepSeek-R1: Reinforcement Learning with Cold...',\n",
" 'node_id': '0013',\n", " 'node_id': '0013',\n",
" 'summary': 'This partial document describes the training pipel...'},\n", " 'summary': 'This partial document describes the trai...'},\n",
" {'title': '2.4. Distillation: Empower Small Models with Reaso...',\n", " {'title': '2.4. Distillation: Empower Small Models ...',\n",
" 'node_id': '0014',\n", " 'node_id': '0014',\n",
" 'summary': 'This partial document discusses the process of dis...'}]},\n", " 'summary': 'This partial document discusses the proc...'}]},\n",
" {'title': '3. Experiment',\n", " {'title': '3. Experiment',\n",
" 'node_id': '0015',\n", " 'node_id': '0015',\n",
" 'prefix_summary': 'The partial document describes the experimental se...',\n", " 'prefix_summary': 'The partial document describes the exper...',\n",
" 'nodes': [{'title': '3.1. DeepSeek-R1 Evaluation',\n", " 'nodes': [{'title': '3.1. DeepSeek-R1 Evaluation',\n",
" 'node_id': '0016',\n", " 'node_id': '0016',\n",
" 'summary': 'This partial document presents a comprehensive eva...'},\n", " 'summary': 'This partial document presents a compreh...'},\n",
" {'title': '3.2. Distilled Model Evaluation',\n", " {'title': '3.2. Distilled Model Evaluation',\n",
" 'node_id': '0017',\n", " 'node_id': '0017',\n",
" 'summary': 'This partial document presents an evaluation of va...'}]},\n", " 'summary': 'This partial document presents an evalua...'}]},\n",
" {'title': '4. Discussion',\n", " {'title': '4. Discussion',\n",
" 'node_id': '0018',\n", " 'node_id': '0018',\n",
" 'summary': 'This partial document discusses the comparative ef...'},\n", " 'summary': 'This partial document discusses the comp...'},\n",
" {'title': '5. Conclusion, Limitations, and Future Work',\n", " {'title': '5. Conclusion, Limitations, and Future W...',\n",
" 'node_id': '0019',\n", " 'node_id': '0019',\n",
" 'summary': 'This partial document presents the conclusion, lim...'},\n", " 'summary': 'This partial document presents the concl...'},\n",
" {'title': 'References',\n", " {'title': 'References',\n",
" 'node_id': '0020',\n", " 'node_id': '0020',\n",
" 'summary': 'The partial document consists of a comprehensive r...'},\n", " 'summary': 'The partial document consists of a compr...'},\n",
" {'title': 'Appendix', 'node_id': '0021', 'summary': '## Appendix\\n'},\n", " {'title': 'Appendix', 'node_id': '0021', 'summary': '## Appendix\\n'},\n",
" {'title': 'A. Contributions and Acknowledgments',\n", " {'title': 'A. Contributions and Acknowledgments',\n",
" 'node_id': '0022',\n", " 'node_id': '0022',\n",
" 'summary': 'This partial document section details the contribu...'}]}]\n" " 'summary': 'This partial document section details th...'}]}]\n"
] ]
} }
], ],