{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "TexEy0OakvVG" }, "source": [ "# DP-Fusion-Lib: Basic Usage\n", "\n", "This notebook demonstrates how to use **DP-Fusion-Lib** for differentially private text generation with the Tagger API for automatic PII detection.\n", "\n", "**Requirements:**\n", "- GPU with CUDA support (recommended)\n", "- API key from [console.documentprivacy.com](https://console.documentprivacy.com)\n", "\n", "**Documentation:** [GitHub Repository](https://github.com/rushil-thareja/dp-fusion-lib)" ] }, { "cell_type": "markdown", "metadata": { "id": "C2PM1ztgkvVH" }, "source": [ "## 1. Installation\n", "\n", "Install the library if not already installed:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "nGFTltSakvVH" }, "outputs": [], "source": [ "# Uncomment to install\n", "# !pip install dp-fusion-lib" ] }, { "cell_type": "code", "source": [ "!pip install -i https://test.pypi.org/simple/ dp-fusion-lib==0.1.0" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "U343cAI1kyil", "outputId": "25b319b3-7222-426e-b573-72f24c9aaa86" }, "execution_count": 19, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://test.pypi.org/simple/\n", "Requirement already satisfied: dp-fusion-lib==0.1.0 in /usr/local/lib/python3.12/dist-packages (0.1.0)\n", "Requirement already satisfied: torch>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from dp-fusion-lib==0.1.0) (2.9.0+cu126)\n", "Requirement already satisfied: transformers>=4.25.0 in /usr/local/lib/python3.12/dist-packages (from dp-fusion-lib==0.1.0) (4.57.3)\n", "Requirement already satisfied: accelerate>=0.20.0 in /usr/local/lib/python3.12/dist-packages (from dp-fusion-lib==0.1.0) (1.12.0)\n", "Requirement already satisfied: requests>=2.25.0 in /usr/local/lib/python3.12/dist-packages (from dp-fusion-lib==0.1.0) (2.32.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (2.0.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (25.0)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (6.0.3)\n", "Requirement already satisfied: huggingface_hub>=0.21.0 in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (0.36.0)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from accelerate>=0.20.0->dp-fusion-lib==0.1.0) (0.7.0)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->dp-fusion-lib==0.1.0) (3.4.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->dp-fusion-lib==0.1.0) (3.11)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->dp-fusion-lib==0.1.0) (2.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests>=2.25.0->dp-fusion-lib==0.1.0) (2025.11.12)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (3.20.0)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (4.15.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (75.2.0)\n", "Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (1.14.0)\n", "Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (3.6.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (3.1.6)\n", "Requirement already satisfied: fsspec>=0.8.5 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (2025.3.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.77)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.77)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.80)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (9.10.2.21)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.4.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (11.3.0.4)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (10.3.7.77)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (11.7.1.2)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.5.4.2)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (0.7.1)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (2.27.5)\n", "Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (3.3.20)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.77)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (12.6.85)\n", "Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (1.11.1.6)\n", "Requirement already satisfied: triton==3.5.0 in /usr/local/lib/python3.12/dist-packages (from torch>=2.0.0->dp-fusion-lib==0.1.0) (3.5.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers>=4.25.0->dp-fusion-lib==0.1.0) (2025.11.3)\n", "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers>=4.25.0->dp-fusion-lib==0.1.0) (0.22.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers>=4.25.0->dp-fusion-lib==0.1.0) (4.67.1)\n", "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.21.0->accelerate>=0.20.0->dp-fusion-lib==0.1.0) (1.2.0)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch>=2.0.0->dp-fusion-lib==0.1.0) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=2.0.0->dp-fusion-lib==0.1.0) (3.0.3)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "9hb93J13kvVI" }, "source": [ "## 2. Configuration\n", "\n", "Set your model and API key configuration:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "fQT3s44jkvVI" }, "outputs": [], "source": [ "# Model configuration\n", "MODEL_ID = \"Qwen/Qwen2.5-7B-Instruct\"\n", "\n", "# API key - Get your free key at console.documentprivacy.com\n", "API_KEY = \"put ure key here\"" ] }, { "cell_type": "markdown", "metadata": { "id": "UIhmvNqRkvVI" }, "source": [ "## 3. Import Libraries and Load Model" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MiJx1tI0kvVI", "outputId": "ae6aba79-32fa-4219-edb9-d52d09b01a65" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "PyTorch version: 2.9.0+cu126\n", "CUDA available: True\n", "GPU: Tesla T4\n" ] } ], "source": [ "import torch\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from dp_fusion_lib import DPFusion, Tagger, compute_epsilon_single_group\n", "\n", "print(f\"PyTorch version: {torch.__version__}\")\n", "print(f\"CUDA available: {torch.cuda.is_available()}\")\n", "if torch.cuda.is_available():\n", " print(f\"GPU: {torch.cuda.get_device_name(0)}\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 243, "referenced_widgets": [ "34f4b8e4b37c4932833b55d050b63a64", "b95288ad9e0b495bb7f44a0321a8e5dd", "82bf1d92fc124ff49453d44cad2d4f86", "ea453ba8b19e4594a1ddac595d6904e6", "045c2c99b6584e1789cf5fd8ee4e88d4", "e04e11ae1e2646ba97ce86bb6e5e9325", "b64798b6fcb74406b18af2a0e6c64c2e", "c6cd376653124eeba5687480b5a15a41", "e1d7bd89d3e04a6b871cff2ee6608593", "e88a4a6449334717a39266d9dd094303", "0dd1aece7079418e94522c35877b281a" ] }, "id": "o3tiqNnhkvVI", "outputId": "18f9825d-1b05-43fb-d09f-0592ba5a46c1" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Loading tokenizer: Qwen/Qwen2.5-7B-Instruct\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Loading model: Qwen/Qwen2.5-7B-Instruct\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/4 [00:00system\n", "You are a helpful assistant that paraphrases text.<|im_end|>\n", "<|im_start|>user\n", "The applicant was born in 1973 and currently resides in Les Salles-sur-Verdon, France.\n", "In the early 1990s, a new criminal phenomenon emerged in Denmark known as 'tax asset stripping cases' (selskabstømmersager).<|im_end|>\n", "<|im_start|>system\n", "Now paraphrase this text for privacy<|im_end|>\n", "<|im_start|>assistant\n", "Sure, here is the paraphrase of the above text that ensures privacy:<|im_end|>\n", "<|im_start|>assistant\n", "\n" ] } ], "source": [ "# View the private context (original)\n", "print(\"PRIVATE CONTEXT (full text):\")\n", "print(\"=\" * 60)\n", "print(dpf.private_context)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "B0OmiQMhkvVK", "outputId": "7fbd997e-c73a-489a-b89a-a2bd774ee867" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "PUBLIC CONTEXT (redacted):\n", "============================================================\n", "<|im_start|>system\n", "You are a helpful assistant that paraphrases text.<|im_end|>\n", "<|im_start|>user\n", "The applicant was born in ____ and currently resides in_________.\n", "In the_______, a new criminal phenomenon emerged in Denmark known as '____' (selskabstømmersager).<|im_end|>\n", "<|im_start|>system\n", "Now paraphrase this text for privacy<|im_end|>\n", "<|im_start|>assistant\n", "Sure, here is the paraphrase of the above text that ensures privacy:<|im_end|>\n", "<|im_start|>assistant\n", "\n" ] } ], "source": [ "# View the public context (redacted)\n", "print(\"PUBLIC CONTEXT (redacted):\")\n", "print(\"=\" * 60)\n", "print(dpf.public_context)" ] }, { "cell_type": "markdown", "metadata": { "id": "0iBuI2HnkvVK" }, "source": [ "## 9. Generate with Differential Privacy\n", "\n", "Now generate text using DP-Fusion, which provides formal (ε, δ)-DP guarantees:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lZP5DZJ3kvVK", "outputId": "d7f92adc-ef06-4d32-bb6a-3ccc78d28d97" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Generating with α=2.0, β=0.01...\n", "------------------------------------------------------------\n", "[DP-Fusion] Starting generation. Private groups: ['PRIVATE']\n", "[Initial] Prefix shape for group PUBLIC: torch.Size([115])\n", "[Initial] Prefix shape for group PRIVATE: torch.Size([115])\n", "[Initial] Input batch shape: torch.Size([2, 115])\n", "[Initial] Selected Lambda for group PRIVATE: 0.008893966674804688, Divergence: 0.019663169980049133\n", "[Initial] Sampled token 'The' (ID=785)\n", "[Step 1] Selected Lambda for group PRIVATE: 0.1636190414428711, Divergence: 0.01983731985092163\n", "[Step 2] Selected Lambda for group PRIVATE: 0.046942710876464844, Divergence: 0.0197348203510046\n", "[Step 3] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.0004015354788862169\n", "[Step 4] Selected Lambda for group PRIVATE: 0.9472379684448242, Divergence: 0.019990170374512672\n", "[Step 5] Selected Lambda for group PRIVATE: 0.0008144378662109375, Divergence: 0.019963061437010765\n", "[Step 6] Selected Lambda for group PRIVATE: 0.18547725677490234, Divergence: 0.01920509897172451\n", "[Step 7] Selected Lambda for group PRIVATE: 0.0012063980102539062, Divergence: 0.019980354234576225\n", "[Step 8] Selected Lambda for group PRIVATE: 9.5367431640625e-07, Divergence: 0.00906990934163332\n", "[Step 9] Selected Lambda for group PRIVATE: 9.5367431640625e-07, Divergence: 0.009266345761716366\n", "[Step 10] Selected Lambda for group PRIVATE: 0.1125802993774414, Divergence: 0.01998678222298622\n", "[Step 11] Selected Lambda for group PRIVATE: 0.1944751739501953, Divergence: 0.01994471438229084\n", "[Step 12] Selected Lambda for group PRIVATE: 2.86102294921875e-06, Divergence: 0.014077998697757721\n", "[Step 13] Selected Lambda for group PRIVATE: 0.0046291351318359375, Divergence: 0.018939370289444923\n", "[Step 14] Selected Lambda for group PRIVATE: 4.00543212890625e-05, Divergence: 0.018702121451497078\n", "[Step 15] Selected Lambda for group PRIVATE: 0.0018014907836914062, Divergence: 0.019982341676950455\n", "[Step 16] Selected Lambda for group PRIVATE: 0.0003833770751953125, Divergence: 0.01862209104001522\n", "[Step 17] Selected Lambda for group PRIVATE: 0.0503692626953125, Divergence: 0.019991105422377586\n", "[Step 18] Selected Lambda for group PRIVATE: 5.53131103515625e-05, Divergence: 0.01986653544008732\n", "[Step 19] Selected Lambda for group PRIVATE: 0.000194549560546875, Divergence: 0.019913045689463615\n", "[Step 20] Selected Lambda for group PRIVATE: 0.000911712646484375, Divergence: 0.019784843549132347\n", "[Step 21] Selected Lambda for group PRIVATE: 0.001743316650390625, Divergence: 0.019273623824119568\n", "[Step 22] Selected Lambda for group PRIVATE: 0.0042724609375, Divergence: 0.019266607239842415\n", "[Step 23] Selected Lambda for group PRIVATE: 0.014746665954589844, Divergence: 0.019386107102036476\n", "[Step 24] Selected Lambda for group PRIVATE: 0.013669967651367188, Divergence: 0.019959088414907455\n", "[Step 25] Selected Lambda for group PRIVATE: 0.009566307067871094, Divergence: 0.019996946677565575\n", "[Step 26] Selected Lambda for group PRIVATE: 0.0005159378051757812, Divergence: 0.019505709409713745\n", "[Step 27] Selected Lambda for group PRIVATE: 0.000492095947265625, Divergence: 0.019979771226644516\n", "[Step 28] Selected Lambda for group PRIVATE: 0.000583648681640625, Divergence: 0.019976966083049774\n", "[Step 29] Selected Lambda for group PRIVATE: 0.05245494842529297, Divergence: 0.01867333985865116\n", "[Step 30] Selected Lambda for group PRIVATE: 0.021539688110351562, Divergence: 0.018397172912955284\n", "[Step 31] Selected Lambda for group PRIVATE: 0.0001544952392578125, Divergence: 0.0197717547416687\n", "[Step 32] Selected Lambda for group PRIVATE: 7.534027099609375e-05, Divergence: 0.019834864884614944\n", "[Step 33] Selected Lambda for group PRIVATE: 9.5367431640625e-07, Divergence: 0.006165650207549334\n", "[Step 34] Selected Lambda for group PRIVATE: 2.86102294921875e-06, Divergence: 0.014554639346897602\n", "[Step 35] Selected Lambda for group PRIVATE: 0.0001220703125, Divergence: 0.01999729871749878\n", "[Step 36] Selected Lambda for group PRIVATE: 0.12773704528808594, Divergence: 0.019103704020380974\n", "[Step 37] Selected Lambda for group PRIVATE: 1.0, Divergence: 1.5139465176616795e-05\n", "[Step 38] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.0008703258354216814\n", "[Step 39] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.003242950653657317\n", "[Step 40] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.0016752745723351836\n", "[Step 41] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.0003064401389565319\n", "[Step 42] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.002209723461419344\n", "[Step 43] Selected Lambda for group PRIVATE: 1.0, Divergence: 0.0019462230848148465\n", "[Step 44] Selected Lambda for group PRIVATE: 0.00229644775390625, Divergence: 0.01995277777314186\n", "[Step 45] Selected Lambda for group PRIVATE: 0.006703376770019531, Divergence: 0.019331855699419975\n", "[DP-Fusion] Generation complete.\n", "Generation complete!\n" ] } ], "source": [ "# Privacy parameters\n", "ALPHA = 2.0 # Rényi divergence order\n", "BETA = 0.01 # Per-token privacy budget (lower = more private)\n", "\n", "print(f\"Generating with α={ALPHA}, β={BETA}...\")\n", "print(\"-\" * 60)\n", "\n", "output = dpf.generate(\n", " alpha=ALPHA,\n", " beta=BETA,\n", " temperature=1.0,\n", " max_new_tokens=100,\n", " debug=True # Set to True for detailed output\n", ")\n", "\n", "print(\"Generation complete!\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2OVkRzxikvVK", "outputId": "68caff52-9812-44ab-c59c-ff72b70d8fec" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "GENERATED TEXT:\n", "============================================================\n", "system\n", "You are a helpful assistant that paraphrases text.\n", "user\n", "The applicant was born in ____ and currently resides in_________.\n", "In the_______, a new criminal phenomenon emerged in Denmark known as '____' (selskabstømmersager).\n", "system\n", "Now paraphrase this text for privacy\n", "assistant\n", "Sure, here is the paraphrase of the above text that ensures privacy:\n", "assistant\n", "The individual was born in an unspecified location and currently resides in an unspecified place. In a certain region, a new criminal phenomenon emerged in Denmark known as 'cluster incidents' (selskabstømmersager).\n" ] } ], "source": [ "# Display generated text\n", "print(\"GENERATED TEXT:\")\n", "print(\"=\" * 60)\n", "print(output['text'])" ] }, { "cell_type": "markdown", "metadata": { "id": "Qlhi_ZazkvVK" }, "source": [ "## 10. Analyze Generation Statistics" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jWrjG06ukvVK", "outputId": "ed2f6616-86fb-4956-fcdd-1d1928dffa38" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Lambda Statistics (mixing parameter):\n", " Mean: 0.2215\n", " Min: 0.0000\n", " Max: 1.0000\n", " Count: 45 tokens\n" ] } ], "source": [ "# Lambda statistics (mixing parameter)\n", "if output['lambdas'].get('PRIVATE'):\n", " lambdas = output['lambdas']['PRIVATE']\n", " print(\"Lambda Statistics (mixing parameter):\")\n", " print(f\" Mean: {sum(lambdas)/len(lambdas):.4f}\")\n", " print(f\" Min: {min(lambdas):.4f}\")\n", " print(f\" Max: {max(lambdas):.4f}\")\n", " print(f\" Count: {len(lambdas)} tokens\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EiM3ZbhwkvVK", "outputId": "e6db2c36-31d4-4c8d-d142-74f506233631" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Divergence Statistics:\n", " Mean: 0.0153\n", " Min: 0.0000\n", " Max: 0.0200\n", " Count: 45 tokens\n" ] } ], "source": [ "# Divergence statistics\n", "if output['divergences'].get('PRIVATE'):\n", " divs = output['divergences']['PRIVATE']\n", " print(\"Divergence Statistics:\")\n", " print(f\" Mean: {sum(divs)/len(divs):.4f}\")\n", " print(f\" Min: {min(divs):.4f}\")\n", " print(f\" Max: {max(divs):.4f}\")\n", " print(f\" Count: {len(divs)} tokens\")" ] }, { "cell_type": "markdown", "metadata": { "id": "E8itcYT6kvVK" }, "source": [ "## 11. Compute Privacy Guarantee\n", "\n", "Calculate the formal (ε, δ)-DP guarantee for this generation:" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yq_jSFzskvVK", "outputId": "9b207983-b4d4-43fe-9ace-ccba11ae8368" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "============================================================\n", "(ε, δ)-DIFFERENTIAL PRIVACY GUARANTEE\n", "============================================================\n", "Parameters: α=2.0, β=0.01, δ=1e-05\n", "Tokens generated: 45\n", "\n", "Empirical ε: 12.8942\n", " (computed from actual divergences observed)\n", "\n", "Theoretical ε: 13.3129\n", " (worst-case bound, assuming max divergence per step)\n", "\n", "This generation satisfies (12.89, 1e-05)-DP\n" ] } ], "source": [ "# Privacy accounting parameters\n", "DELTA = 1e-5 # Target δ for (ε, δ)-DP\n", "\n", "if output['divergences'].get('PRIVATE'):\n", " eps_result = compute_epsilon_single_group(\n", " divergences=output['divergences']['PRIVATE'],\n", " alpha=ALPHA,\n", " delta=DELTA,\n", " beta=BETA\n", " )\n", "\n", " print(\"=\" * 60)\n", " print(\"(ε, δ)-DIFFERENTIAL PRIVACY GUARANTEE\")\n", " print(\"=\" * 60)\n", " print(f\"Parameters: α={ALPHA}, β={BETA}, δ={DELTA}\")\n", " print(f\"Tokens generated: {eps_result['T']}\")\n", " print()\n", " print(f\"Empirical ε: {eps_result['empirical']:.4f}\")\n", " print(f\" (computed from actual divergences observed)\")\n", " print()\n", " print(f\"Theoretical ε: {eps_result['theoretical']:.4f}\")\n", " print(f\" (worst-case bound, assuming max divergence per step)\")\n", " print()\n", " print(f\"This generation satisfies ({eps_result['empirical']:.2f}, {DELTA})-DP\")\n", "else:\n", " print(\"No divergences recorded.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "wD9fTQgTkvVK" }, "source": [ "## 12. Summary\n", "\n", "You have successfully:\n", "\n", "1. Loaded an LLM with GPU acceleration\n", "2. Used the Tagger API to automatically identify sensitive phrases\n", "3. Generated text with formal differential privacy guarantees\n", "4. Computed the privacy budget (ε) for your generation\n", "\n", "**Key Concepts:**\n", "\n", "| Metric | Description |\n", "|--------|-------------|\n", "| **Empirical ε** | Actual privacy cost based on observed divergences |\n", "| **Theoretical ε** | Worst-case upper bound for compliance reporting |\n", "| **λ (Lambda)** | Mixing parameter between private and public distributions |\n", "| **β (Beta)** | Per-token privacy budget (lower = more private) |\n", "\n", "**Next Steps:**\n", "- Try different `beta` values to adjust privacy-utility tradeoff\n", "- Experiment with different document types and constitutions\n", "- See the [documentation](https://github.com/rushil-thareja/dp-fusion-lib) for advanced usage" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" }, "colab": { "provenance": [], "gpuType": "T4" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "34f4b8e4b37c4932833b55d050b63a64": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b95288ad9e0b495bb7f44a0321a8e5dd", "IPY_MODEL_82bf1d92fc124ff49453d44cad2d4f86", "IPY_MODEL_ea453ba8b19e4594a1ddac595d6904e6" ], "layout": "IPY_MODEL_045c2c99b6584e1789cf5fd8ee4e88d4" } }, "b95288ad9e0b495bb7f44a0321a8e5dd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e04e11ae1e2646ba97ce86bb6e5e9325", "placeholder": "​", "style": "IPY_MODEL_b64798b6fcb74406b18af2a0e6c64c2e", "value": "Loading checkpoint shards: 100%" } }, "82bf1d92fc124ff49453d44cad2d4f86": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c6cd376653124eeba5687480b5a15a41", "max": 4, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e1d7bd89d3e04a6b871cff2ee6608593", "value": 4 } }, "ea453ba8b19e4594a1ddac595d6904e6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e88a4a6449334717a39266d9dd094303", "placeholder": "​", "style": "IPY_MODEL_0dd1aece7079418e94522c35877b281a", "value": " 4/4 [01:02<00:00, 15.45s/it]" } }, "045c2c99b6584e1789cf5fd8ee4e88d4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e04e11ae1e2646ba97ce86bb6e5e9325": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b64798b6fcb74406b18af2a0e6c64c2e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c6cd376653124eeba5687480b5a15a41": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e1d7bd89d3e04a6b871cff2ee6608593": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e88a4a6449334717a39266d9dd094303": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0dd1aece7079418e94522c35877b281a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }