diff --git a/cookbook/pageindex_RAG_simple.ipynb b/cookbook/pageindex_RAG_simple.ipynb index 3fa21a2..6bc6ee8 100644 --- a/cookbook/pageindex_RAG_simple.ipynb +++ b/cookbook/pageindex_RAG_simple.ipynb @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 51, "metadata": { "id": "hmj3POkDcG1N" }, @@ -154,7 +154,8 @@ " pprint(cleaned_tree, sort_dicts=False, width=100)\n", "\n", "def show(text, width=100):\n", - " print(textwrap.fill(text, width=width))\n", + " for line in text.splitlines():\n", + " print(textwrap.fill(line, width=width))\n", "\n", "def create_node_mapping(tree):\n", " \"\"\"Create a mapping of node_id to node for quick lookup\"\"\"\n", @@ -233,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 61, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -390,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 57, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -449,7 +450,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 58, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -464,17 +465,23 @@ "output_type": "stream", "text": [ "Retrieved Context:\n", - "## 5. Conclusion, Limitations, and Future Work In this work, we share our journey in enhancing\n", - "model reasoning abilities through reinforcement learning. DeepSeek-R1-Zero represents a pure RL\n", - "approach without relying on cold-start data, achieving strong performance across various tasks.\n", - "DeepSeek-R1 is more powerful, leveraging cold-start data alongside iterative RL fine-tuning.\n", - "Ultimately, DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on a range of tasks. We\n", - "further explore distillation the reasoning capability to small dense models. We use DeepSeek-R1 as\n", - "the teacher model to generate 800K training samples, and fine-tune several small dense models. The\n", - "results are promising: DeepSeek-R1-Distill-Qwen-1.5B outperforms GPT-4o and Claude-3.5-Sonnet on\n", + "\n", + "## 5. Conclusion, Limitations, and Future Work\n", + "\n", + "In this work, we share our journey in enhancing model reasoning abilities through reinforcement\n", + "learning. DeepSeek-R1-Zero represents a pure RL approach without relying on cold-start data,\n", + "achieving strong performance across various tasks. DeepSeek-R1 is more powerful, leveraging cold-\n", + "start data alongside iterative RL fine-tuning. Ultimately, DeepSeek-R1 achieves performance\n", + "comparable to OpenAI-o1-1217 on a range of tasks.\n", + "\n", + "We further explore distillation the reasoning capability to small dense models. We use DeepSeek-R1\n", + "as the teacher model to generate 800K training samples, and fine-tune several small dense models.\n", + "The results are promising: DeepSeek-R1-Distill-Qwen-1.5B outperforms GPT-4o and Claude-3.5-Sonnet on\n", "math benchmarks with $28.9 \\%$ on AIME and $83.9 \\%$ on MATH. Other dense models also achieve\n", "impressive results, significantly outperforming other instructiontuned models based on the same\n", - "underlying checkpoints. In the fut...\n" + "underlying checkpoints.\n", + "\n", + "In the fut...\n" ] } ], @@ -482,7 +489,7 @@ "node_list = json.loads(tree_search_result)[\"node_list\"]\n", "relevant_content = \"\\n\\n\".join(node_map[node_id][\"text\"] for node_id in node_list)\n", "\n", - "print('Retrieved Context:')\n", + "print('Retrieved Context:\\n')\n", "show(relevant_content[:1000] + '...')" ] }, @@ -495,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 59, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -510,14 +517,19 @@ "output_type": "stream", "text": [ "Generated Answer:\n", - "**Conclusions in this document:** - DeepSeek-R1-Zero, a pure reinforcement learning (RL) model\n", - "without cold-start data, achieves strong performance across various tasks. - DeepSeek-R1, which\n", - "combines cold-start data with iterative RL fine-tuning, is even more powerful and achieves\n", - "performance comparable to OpenAI-o1-1217 on a range of tasks. - Distilling DeepSeek-R1’s reasoning\n", - "capabilities into smaller dense models is effective: DeepSeek-R1-Distill-Qwen-1.5B outperforms\n", - "GPT-4o and Claude-3.5-Sonnet on math benchmarks, and other dense models also show significant\n", - "improvements over similar instruction-tuned models. - Overall, the approaches described demonstrate\n", - "promising results in enhancing model reasoning abilities through RL and distillation.\n" + "\n", + "The conclusions in this document are:\n", + "\n", + "- DeepSeek-R1-Zero, a pure reinforcement learning (RL) approach without cold-start data, achieves\n", + "strong performance across various tasks.\n", + "- DeepSeek-R1, which combines cold-start data with iterative RL fine-tuning, is more powerful and\n", + "achieves performance comparable to OpenAI-o1-1217 on a range of tasks.\n", + "- Distilling DeepSeek-R1’s reasoning capabilities into smaller dense models is promising; for\n", + "example, DeepSeek-R1-Distill-Qwen-1.5B outperforms GPT-4o and Claude-3.5-Sonnet on math benchmarks,\n", + "and other dense models also show significant improvements over similar instruction-tuned models.\n", + "\n", + "These results demonstrate the effectiveness of the RL-based approach and the potential for\n", + "distilling reasoning abilities into smaller models.\n" ] } ], @@ -531,7 +543,7 @@ "Provide a clear, concise answer based only on the context provided.\n", "\"\"\"\n", "\n", - "print('Generated Answer:')\n", + "print('Generated Answer:\\n')\n", "answer = await call_llm(answer_prompt)\n", "show(answer)" ]