{
  "doc_name": "PRML.pdf",
  "structure": [
    {
      "title": "Preface",
      "start_index": 1,
      "end_index": 6,
      "node_id": "0000"
    },
    {
      "title": "Preface",
      "start_index": 7,
      "end_index": 10,
      "node_id": "0001"
    },
    {
      "title": "Mathematical notation",
      "start_index": 11,
      "end_index": 13,
      "node_id": "0002"
    },
    {
      "title": "Contents",
      "start_index": 13,
      "end_index": 20,
      "node_id": "0003"
    },
    {
      "title": "Introduction",
      "start_index": 21,
      "end_index": 24,
      "nodes": [
        {
          "title": "Example: Polynomial Curve Fitting",
          "start_index": 24,
          "end_index": 32,
          "node_id": "0005"
        },
        {
          "title": "Probability Theory",
          "start_index": 32,
          "end_index": 37,
          "nodes": [
            {
              "title": "Probability densities",
              "start_index": 37,
              "end_index": 39,
              "node_id": "0007"
            },
            {
              "title": "Expectations and covariances",
              "start_index": 39,
              "end_index": 41,
              "node_id": "0008"
            },
            {
              "title": "Bayesian probabilities",
              "start_index": 41,
              "end_index": 44,
              "node_id": "0009"
            },
            {
              "title": "The Gaussian distribution",
              "start_index": 44,
              "end_index": 48,
              "node_id": "0010"
            },
            {
              "title": "Curve fitting re-visited",
              "start_index": 48,
              "end_index": 50,
              "node_id": "0011"
            },
            {
              "title": "Bayesian curve fitting",
              "start_index": 50,
              "end_index": 52,
              "node_id": "0012"
            }
          ],
          "node_id": "0006"
        },
        {
          "title": "Model Selection",
          "start_index": 52,
          "end_index": 53,
          "node_id": "0013"
        },
        {
          "title": "The Curse of Dimensionality",
          "start_index": 53,
          "end_index": 58,
          "node_id": "0014"
        },
        {
          "title": "Decision Theory",
          "start_index": 58,
          "end_index": 59,
          "nodes": [
            {
              "title": "Minimizing the misclassification rate",
              "start_index": 59,
              "end_index": 61,
              "node_id": "0016"
            },
            {
              "title": "Minimizing the expected loss",
              "start_index": 61,
              "end_index": 62,
              "node_id": "0017"
            },
            {
              "title": "The reject option",
              "start_index": 62,
              "end_index": 62,
              "node_id": "0018"
            },
            {
              "title": "Inference and decision",
              "start_index": 62,
              "end_index": 66,
              "node_id": "0019"
            },
            {
              "title": "Loss functions for regression",
              "start_index": 66,
              "end_index": 68,
              "node_id": "0020"
            }
          ],
          "node_id": "0015"
        },
        {
          "title": "Information Theory",
          "start_index": 68,
          "end_index": 75,
          "nodes": [
            {
              "title": "Relative entropy and mutual information",
              "start_index": 75,
              "end_index": 78,
              "node_id": "0022"
            }
          ],
          "node_id": "0021"
        }
      ],
      "node_id": "0004"
    },
    {
      "title": "Exercises",
      "start_index": 78,
      "end_index": 87,
      "node_id": "0023"
    },
    {
      "title": "Probability Distributions",
      "start_index": 87,
      "end_index": 88,
      "nodes": [
        {
          "title": "Binary Variables",
          "start_index": 88,
          "end_index": 91,
          "nodes": [
            {
              "title": "The beta distribution",
              "start_index": 91,
              "end_index": 94,
              "node_id": "0026"
            }
          ],
          "node_id": "0025"
        },
        {
          "title": "Multinomial Variables",
          "start_index": 94,
          "end_index": 96,
          "nodes": [
            {
              "title": "The Dirichlet distribution",
              "start_index": 96,
              "end_index": 98,
              "node_id": "0028"
            }
          ],
          "node_id": "0027"
        },
        {
          "title": "The Gaussian Distribution",
          "start_index": 98,
          "end_index": 105,
          "nodes": [
            {
              "title": "Conditional Gaussian distributions",
              "start_index": 105,
              "end_index": 108,
              "node_id": "0030"
            },
            {
              "title": "Marginal Gaussian distributions",
              "start_index": 108,
              "end_index": 110,
              "node_id": "0031"
            },
            {
              "title": "Bayes\u2019 theorem for Gaussian variables",
              "start_index": 110,
              "end_index": 113,
              "node_id": "0032"
            },
            {
              "title": "Maximum likelihood for the Gaussian",
              "start_index": 113,
              "end_index": 114,
              "node_id": "0033"
            },
            {
              "title": "Sequential estimation",
              "start_index": 114,
              "end_index": 117,
              "node_id": "0034"
            },
            {
              "title": "Bayesian inference for the Gaussian",
              "start_index": 117,
              "end_index": 122,
              "node_id": "0035"
            },
            {
              "title": "Student\u2019s t-distribution",
              "start_index": 122,
              "end_index": 125,
              "node_id": "0036"
            },
            {
              "title": "Periodic variables",
              "start_index": 125,
              "end_index": 130,
              "node_id": "0037"
            },
            {
              "title": "Mixtures of Gaussians",
              "start_index": 130,
              "end_index": 133,
              "node_id": "0038"
            }
          ],
          "node_id": "0029"
        },
        {
          "title": "The Exponential Family",
          "start_index": 133,
          "end_index": 136,
          "nodes": [
            {
              "title": "Maximum likelihood and sufficient statistics",
              "start_index": 136,
              "end_index": 137,
              "node_id": "0040"
            },
            {
              "title": "Conjugate priors",
              "start_index": 137,
              "end_index": 137,
              "node_id": "0041"
            },
            {
              "title": "Noninformative priors",
              "start_index": 137,
              "end_index": 140,
              "node_id": "0042"
            }
          ],
          "node_id": "0039"
        },
        {
          "title": "Nonparametric Methods",
          "start_index": 140,
          "end_index": 142,
          "nodes": [
            {
              "title": "Kernel density estimators",
              "start_index": 142,
              "end_index": 144,
              "node_id": "0044"
            },
            {
              "title": "Nearest-neighbour methods",
              "start_index": 144,
              "end_index": 147,
              "node_id": "0045"
            }
          ],
          "node_id": "0043"
        }
      ],
      "node_id": "0024"
    },
    {
      "title": "Exercises",
      "start_index": 147,
      "end_index": 156,
      "node_id": "0046"
    },
    {
      "title": "Linear Models for Regression",
      "start_index": 157,
      "end_index": 158,
      "nodes": [
        {
          "title": "Linear Basis Function Models",
          "start_index": 158,
          "end_index": 160,
          "nodes": [
            {
              "title": "Maximum likelihood and least squares",
              "start_index": 160,
              "end_index": 163,
              "node_id": "0049"
            },
            {
              "title": "Geometry of least squares",
              "start_index": 163,
              "end_index": 163,
              "node_id": "0050"
            },
            {
              "title": "Sequential learning",
              "start_index": 163,
              "end_index": 164,
              "node_id": "0051"
            },
            {
              "title": "Regularized least squares",
              "start_index": 164,
              "end_index": 166,
              "node_id": "0052"
            },
            {
              "title": "Multiple outputs",
              "start_index": 166,
              "end_index": 167,
              "node_id": "0053"
            }
          ],
          "node_id": "0048"
        },
        {
          "title": "The Bias-Variance Decomposition",
          "start_index": 167,
          "end_index": 172,
          "node_id": "0054"
        },
        {
          "title": "Bayesian Linear Regression",
          "start_index": 172,
          "end_index": 172,
          "nodes": [
            {
              "title": "Parameter distribution",
              "start_index": 172,
              "end_index": 176,
              "node_id": "0056"
            },
            {
              "title": "Predictive distribution",
              "start_index": 176,
              "end_index": 179,
              "node_id": "0057"
            },
            {
              "title": "Equivalent kernel",
              "start_index": 179,
              "end_index": 181,
              "node_id": "0058"
            }
          ],
          "node_id": "0055"
        },
        {
          "title": "Bayesian Model Comparison",
          "start_index": 181,
          "end_index": 185,
          "node_id": "0059"
        },
        {
          "title": "The Evidence Approximation",
          "start_index": 185,
          "end_index": 186,
          "nodes": [
            {
              "title": "Evaluation of the evidence function",
              "start_index": 186,
              "end_index": 188,
              "node_id": "0061"
            },
            {
              "title": "Maximizing the evidence function",
              "start_index": 188,
              "end_index": 190,
              "node_id": "0062"
            },
            {
              "title": "Effective number of parameters",
              "start_index": 190,
              "end_index": 192,
              "node_id": "0063"
            }
          ],
          "node_id": "0060"
        },
        {
          "title": "Limitations of Fixed Basis Functions",
          "start_index": 192,
          "end_index": 193,
          "node_id": "0064"
        }
      ],
      "node_id": "0047"
    },
    {
      "title": "Exercises",
      "start_index": 193,
      "end_index": 199,
      "node_id": "0065"
    },
    {
      "title": "Linear Models for Classification",
      "start_index": 199,
      "end_index": 201,
      "nodes": [
        {
          "title": "Discriminant Functions",
          "start_index": 201,
          "end_index": 201,
          "nodes": [
            {
              "title": "Two classes",
              "start_index": 201,
              "end_index": 202,
              "node_id": "0068"
            },
            {
              "title": "Multiple classes",
              "start_index": 202,
              "end_index": 204,
              "node_id": "0069"
            },
            {
              "title": "Least squares for classification",
              "start_index": 204,
              "end_index": 206,
              "node_id": "0070"
            },
            {
              "title": "Fisher\u2019s linear discriminant",
              "start_index": 206,
              "end_index": 209,
              "node_id": "0071"
            },
            {
              "title": "Relation to least squares",
              "start_index": 209,
              "end_index": 211,
              "node_id": "0072"
            },
            {
              "title": "Fisher\u2019s discriminant for multiple classes",
              "start_index": 211,
              "end_index": 212,
              "node_id": "0073"
            },
            {
              "title": "The perceptron algorithm",
              "start_index": 212,
              "end_index": 216,
              "node_id": "0074"
            }
          ],
          "node_id": "0067"
        },
        {
          "title": "Probabilistic Generative Models",
          "start_index": 216,
          "end_index": 218,
          "nodes": [
            {
              "title": "Continuous inputs",
              "start_index": 218,
              "end_index": 220,
              "node_id": "0076"
            },
            {
              "title": "Maximum likelihood solution",
              "start_index": 220,
              "end_index": 222,
              "node_id": "0077"
            },
            {
              "title": "Discrete features",
              "start_index": 222,
              "end_index": 222,
              "node_id": "0078"
            },
            {
              "title": "Exponential family",
              "start_index": 222,
              "end_index": 223,
              "node_id": "0079"
            }
          ],
          "node_id": "0075"
        },
        {
          "title": "Probabilistic Discriminative Models",
          "start_index": 223,
          "end_index": 224,
          "nodes": [
            {
              "title": "Fixed basis functions",
              "start_index": 224,
              "end_index": 225,
              "node_id": "0081"
            },
            {
              "title": "Logistic regression",
              "start_index": 225,
              "end_index": 227,
              "node_id": "0082"
            },
            {
              "title": "Iterative reweighted least squares",
              "start_index": 227,
              "end_index": 229,
              "node_id": "0083"
            },
            {
              "title": "Multiclass logistic regression",
              "start_index": 229,
              "end_index": 230,
              "node_id": "0084"
            },
            {
              "title": "Probit regression",
              "start_index": 230,
              "end_index": 232,
              "node_id": "0085"
            },
            {
              "title": "Canonical link functions",
              "start_index": 232,
              "end_index": 232,
              "node_id": "0086"
            }
          ],
          "node_id": "0080"
        },
        {
          "title": "The Laplace Approximation",
          "start_index": 233,
          "end_index": 236,
          "nodes": [
            {
              "title": "Model comparison and BIC",
              "start_index": 236,
              "end_index": 237,
              "node_id": "0088"
            }
          ],
          "node_id": "0087"
        },
        {
          "title": "Bayesian Logistic Regression",
          "start_index": 237,
          "end_index": 237,
          "nodes": [
            {
              "title": "Laplace approximation",
              "start_index": 237,
              "end_index": 238,
              "node_id": "0090"
            },
            {
              "title": "Predictive distribution",
              "start_index": 238,
              "end_index": 240,
              "node_id": "0091"
            }
          ],
          "node_id": "0089"
        }
      ],
      "node_id": "0066"
    },
    {
      "title": "Exercises",
      "start_index": 240,
      "end_index": 245,
      "node_id": "0092"
    },
    {
      "title": "Neural Networks",
      "start_index": 245,
      "end_index": 247,
      "nodes": [
        {
          "title": "Feed-forward Network Functions",
          "start_index": 247,
          "end_index": 251,
          "nodes": [
            {
              "title": "Weight-space symmetries",
              "start_index": 251,
              "end_index": 252,
              "node_id": "0095"
            }
          ],
          "node_id": "0094"
        },
        {
          "title": "Network Training",
          "start_index": 252,
          "end_index": 256,
          "nodes": [
            {
              "title": "Parameter optimization",
              "start_index": 256,
              "end_index": 257,
              "node_id": "0097"
            },
            {
              "title": "Local quadratic approximation",
              "start_index": 257,
              "end_index": 259,
              "node_id": "0098"
            },
            {
              "title": "Use of gradient information",
              "start_index": 259,
              "end_index": 260,
              "node_id": "0099"
            },
            {
              "title": "Gradient descent optimization",
              "start_index": 260,
              "end_index": 261,
              "node_id": "0100"
            }
          ],
          "node_id": "0096"
        },
        {
          "title": "Error Backpropagation",
          "start_index": 261,
          "end_index": 262,
          "nodes": [
            {
              "title": "Evaluation of error-function derivatives",
              "start_index": 262,
              "end_index": 265,
              "node_id": "0102"
            },
            {
              "title": "A simple example",
              "start_index": 265,
              "end_index": 266,
              "node_id": "0103"
            },
            {
              "title": "Efficiency of backpropagation",
              "start_index": 266,
              "end_index": 267,
              "node_id": "0104"
            },
            {
              "title": "The Jacobian matrix",
              "start_index": 267,
              "end_index": 269,
              "node_id": "0105"
            }
          ],
          "node_id": "0101"
        },
        {
          "title": "The Hessian Matrix",
          "start_index": 269,
          "end_index": 270,
          "nodes": [
            {
              "title": "Diagonal approximation",
              "start_index": 270,
              "end_index": 271,
              "node_id": "0107"
            },
            {
              "title": "Outer product approximation",
              "start_index": 271,
              "end_index": 272,
              "node_id": "0108"
            },
            {
              "title": "Inverse Hessian",
              "start_index": 272,
              "end_index": 272,
              "node_id": "0109"
            },
            {
              "title": "Finite differences",
              "start_index": 272,
              "end_index": 273,
              "node_id": "0110"
            },
            {
              "title": "Exact evaluation of the Hessian",
              "start_index": 273,
              "end_index": 274,
              "node_id": "0111"
            },
            {
              "title": "Fast multiplication by the Hessian",
              "start_index": 274,
              "end_index": 276,
              "node_id": "0112"
            }
          ],
          "node_id": "0106"
        },
        {
          "title": "Regularization in Neural Networks",
          "start_index": 276,
          "end_index": 277,
          "nodes": [
            {
              "title": "Consistent Gaussian priors",
              "start_index": 277,
              "end_index": 279,
              "node_id": "0114"
            },
            {
              "title": "Early stopping",
              "start_index": 279,
              "end_index": 281,
              "node_id": "0115"
            },
            {
              "title": "Invariances",
              "start_index": 281,
              "end_index": 283,
              "node_id": "0116"
            },
            {
              "title": "Tangent propagation",
              "start_index": 283,
              "end_index": 285,
              "node_id": "0117"
            },
            {
              "title": "Training with transformed data",
              "start_index": 285,
              "end_index": 287,
              "node_id": "0118"
            },
            {
              "title": "Convolutional networks",
              "start_index": 287,
              "end_index": 289,
              "node_id": "0119"
            },
            {
              "title": "Soft weight sharing",
              "start_index": 289,
              "end_index": 292,
              "node_id": "0120"
            }
          ],
          "node_id": "0113"
        },
        {
          "title": "Mixture Density Networks",
          "start_index": 292,
          "end_index": 297,
          "node_id": "0121"
        },
        {
          "title": "Bayesian Neural Networks",
          "start_index": 297,
          "end_index": 298,
          "nodes": [
            {
              "title": "Posterior parameter distribution",
              "start_index": 298,
              "end_index": 300,
              "node_id": "0123"
            },
            {
              "title": "Hyperparameter optimization",
              "start_index": 300,
              "end_index": 301,
              "node_id": "0124"
            },
            {
              "title": "Bayesian neural networks for classification",
              "start_index": 301,
              "end_index": 304,
              "node_id": "0125"
            }
          ],
          "node_id": "0122"
        }
      ],
      "node_id": "0093"
    },
    {
      "title": "Exercises",
      "start_index": 304,
      "end_index": 311,
      "node_id": "0126"
    },
    {
      "title": "Kernel Methods",
      "start_index": 311,
      "end_index": 313,
      "nodes": [
        {
          "title": "Dual Representations",
          "start_index": 313,
          "end_index": 314,
          "node_id": "0128"
        },
        {
          "title": "Constructing Kernels",
          "start_index": 314,
          "end_index": 319,
          "node_id": "0129"
        },
        {
          "title": "Radial Basis Function Networks",
          "start_index": 319,
          "end_index": 321,
          "nodes": [
            {
              "title": "Nadaraya-Watson model",
              "start_index": 321,
              "end_index": 323,
              "node_id": "0131"
            }
          ],
          "node_id": "0130"
        },
        {
          "title": "Gaussian Processes",
          "start_index": 323,
          "end_index": 324,
          "nodes": [
            {
              "title": "Linear regression revisited",
              "start_index": 324,
              "end_index": 326,
              "node_id": "0133"
            },
            {
              "title": "Gaussian processes for regression",
              "start_index": 326,
              "end_index": 331,
              "node_id": "0134"
            },
            {
              "title": "Learning the hyperparameters",
              "start_index": 331,
              "end_index": 332,
              "node_id": "0135"
            },
            {
              "title": "Automatic relevance determination",
              "start_index": 332,
              "end_index": 333,
              "node_id": "0136"
            },
            {
              "title": "Gaussian processes for classification",
              "start_index": 333,
              "end_index": 335,
              "node_id": "0137"
            },
            {
              "title": "Laplace approximation",
              "start_index": 335,
              "end_index": 339,
              "node_id": "0138"
            },
            {
              "title": "Connection to neural networks",
              "start_index": 339,
              "end_index": 340,
              "node_id": "0139"
            }
          ],
          "node_id": "0132"
        }
      ],
      "node_id": "0127"
    },
    {
      "title": "Exercises",
      "start_index": 340,
      "end_index": 344,
      "node_id": "0140"
    },
    {
      "title": "Sparse Kernel Machines",
      "start_index": 345,
      "end_index": 346,
      "nodes": [
        {
          "title": "Maximum Margin Classifiers",
          "start_index": 346,
          "end_index": 351,
          "nodes": [
            {
              "title": "Overlapping class distributions",
              "start_index": 351,
              "end_index": 356,
              "node_id": "0143"
            },
            {
              "title": "Relation to logistic regression",
              "start_index": 356,
              "end_index": 358,
              "node_id": "0144"
            },
            {
              "title": "Multiclass SVMs",
              "start_index": 358,
              "end_index": 359,
              "node_id": "0145"
            },
            {
              "title": "SVMs for regression",
              "start_index": 359,
              "end_index": 364,
              "node_id": "0146"
            },
            {
              "title": "Computational learning theory",
              "start_index": 364,
              "end_index": 365,
              "node_id": "0147"
            }
          ],
          "node_id": "0142"
        },
        {
          "title": "Relevance Vector Machines",
          "start_index": 365,
          "end_index": 365,
          "nodes": [
            {
              "title": "RVM for regression",
              "start_index": 365,
              "end_index": 369,
              "node_id": "0149"
            },
            {
              "title": "Analysis of sparsity",
              "start_index": 369,
              "end_index": 373,
              "node_id": "0150"
            },
            {
              "title": "RVM for classification",
              "start_index": 373,
              "end_index": 377,
              "node_id": "0151"
            }
          ],
          "node_id": "0148"
        }
      ],
      "node_id": "0141"
    },
    {
      "title": "Exercises",
      "start_index": 377,
      "end_index": 379,
      "node_id": "0152"
    },
    {
      "title": "Graphical Models",
      "start_index": 379,
      "end_index": 380,
      "nodes": [
        {
          "title": "Bayesian Networks",
          "start_index": 380,
          "end_index": 382,
          "nodes": [
            {
              "title": "Example: Polynomial regression",
              "start_index": 382,
              "end_index": 385,
              "node_id": "0155"
            },
            {
              "title": "Generative models",
              "start_index": 385,
              "end_index": 386,
              "node_id": "0156"
            },
            {
              "title": "Discrete variables",
              "start_index": 386,
              "end_index": 390,
              "node_id": "0157"
            },
            {
              "title": "Linear-Gaussian models",
              "start_index": 390,
              "end_index": 392,
              "node_id": "0158"
            }
          ],
          "node_id": "0154"
        },
        {
          "title": "Conditional Independence",
          "start_index": 392,
          "end_index": 393,
          "nodes": [
            {
              "title": "Three example graphs",
              "start_index": 393,
              "end_index": 398,
              "node_id": "0160"
            },
            {
              "title": "D-separation",
              "start_index": 398,
              "end_index": 403,
              "node_id": "0161"
            }
          ],
          "node_id": "0159"
        },
        {
          "title": "Markov Random Fields",
          "start_index": 403,
          "end_index": 403,
          "nodes": [
            {
              "title": "Conditional independence properties",
              "start_index": 403,
              "end_index": 404,
              "node_id": "0163"
            },
            {
              "title": "Factorization properties",
              "start_index": 404,
              "end_index": 407,
              "node_id": "0164"
            },
            {
              "title": "Illustration: Image de-noising",
              "start_index": 407,
              "end_index": 410,
              "node_id": "0165"
            },
            {
              "title": "Relation to directed graphs",
              "start_index": 410,
              "end_index": 413,
              "node_id": "0166"
            }
          ],
          "node_id": "0162"
        },
        {
          "title": "Inference in Graphical Models",
          "start_index": 413,
          "end_index": 414,
          "nodes": [
            {
              "title": "Inference on a chain",
              "start_index": 414,
              "end_index": 418,
              "node_id": "0168"
            },
            {
              "title": "Trees",
              "start_index": 418,
              "end_index": 419,
              "node_id": "0169"
            },
            {
              "title": "Factor graphs",
              "start_index": 419,
              "end_index": 422,
              "node_id": "0170"
            },
            {
              "title": "The sum-product algorithm",
              "start_index": 422,
              "end_index": 431,
              "node_id": "0171"
            },
            {
              "title": "The max-sum algorithm",
              "start_index": 431,
              "end_index": 436,
              "node_id": "0172"
            },
            {
              "title": "Exact inference in general graphs",
              "start_index": 436,
              "end_index": 437,
              "node_id": "0173"
            },
            {
              "title": "Loopy belief propagation",
              "start_index": 437,
              "end_index": 438,
              "node_id": "0174"
            },
            {
              "title": "Learning the graph structure",
              "start_index": 438,
              "end_index": 438,
              "node_id": "0175"
            }
          ],
          "node_id": "0167"
        }
      ],
      "node_id": "0153"
    },
    {
      "title": "Exercises",
      "start_index": 438,
      "end_index": 443,
      "node_id": "0176"
    },
    {
      "title": "Mixture Models and EM",
      "start_index": 443,
      "end_index": 444,
      "nodes": [
        {
          "title": "K-means Clustering",
          "start_index": 444,
          "end_index": 448,
          "nodes": [
            {
              "title": "Image segmentation and compression",
              "start_index": 448,
              "end_index": 450,
              "node_id": "0179"
            }
          ],
          "node_id": "0178"
        },
        {
          "title": "Mixtures of Gaussians",
          "start_index": 450,
          "end_index": 452,
          "nodes": [
            {
              "title": "Maximum likelihood",
              "start_index": 452,
              "end_index": 455,
              "node_id": "0181"
            },
            {
              "title": "EM for Gaussian mixtures",
              "start_index": 455,
              "end_index": 459,
              "node_id": "0182"
            }
          ],
          "node_id": "0180"
        },
        {
          "title": "An Alternative View of EM",
          "start_index": 459,
          "end_index": 461,
          "nodes": [
            {
              "title": "Gaussian mixtures revisited",
              "start_index": 461,
              "end_index": 463,
              "node_id": "0184"
            },
            {
              "title": "Relation to K-means",
              "start_index": 463,
              "end_index": 464,
              "node_id": "0185"
            },
            {
              "title": "Mixtures of Bernoulli distributions",
              "start_index": 464,
              "end_index": 468,
              "node_id": "0186"
            },
            {
              "title": "EM for Bayesian linear regression",
              "start_index": 468,
              "end_index": 470,
              "node_id": "0187"
            }
          ],
          "node_id": "0183"
        },
        {
          "title": "The EM Algorithm in General",
          "start_index": 470,
          "end_index": 475,
          "node_id": "0188"
        }
      ],
      "node_id": "0177"
    },
    {
      "title": "Exercises",
      "start_index": 475,
      "end_index": 480,
      "node_id": "0189"
    },
    {
      "title": "Approximate Inference",
      "start_index": 481,
      "end_index": 482,
      "nodes": [
        {
          "title": "Variational Inference",
          "start_index": 482,
          "end_index": 484,
          "nodes": [
            {
              "title": "Factorized distributions",
              "start_index": 484,
              "end_index": 486,
              "node_id": "0192"
            },
            {
              "title": "Properties of factorized approximations",
              "start_index": 486,
              "end_index": 490,
              "node_id": "0193"
            },
            {
              "title": "Example: The univariate Gaussian",
              "start_index": 490,
              "end_index": 493,
              "node_id": "0194"
            },
            {
              "title": "Model comparison",
              "start_index": 493,
              "end_index": 494,
              "node_id": "0195"
            }
          ],
          "node_id": "0191"
        },
        {
          "title": "Illustration: Variational Mixture of Gaussians",
          "start_index": 494,
          "end_index": 495,
          "nodes": [
            {
              "title": "Variational distribution",
              "start_index": 495,
              "end_index": 501,
              "node_id": "0197"
            },
            {
              "title": "Variational lower bound",
              "start_index": 501,
              "end_index": 502,
              "node_id": "0198"
            },
            {
              "title": "Predictive density",
              "start_index": 502,
              "end_index": 503,
              "node_id": "0199"
            },
            {
              "title": "Determining the number of components",
              "start_index": 503,
              "end_index": 505,
              "node_id": "0200"
            },
            {
              "title": "Induced factorizations",
              "start_index": 505,
              "end_index": 506,
              "node_id": "0201"
            }
          ],
          "node_id": "0196"
        },
        {
          "title": "Variational Linear Regression",
          "start_index": 506,
          "end_index": 506,
          "nodes": [
            {
              "title": "Variational distribution",
              "start_index": 506,
              "end_index": 508,
              "node_id": "0203"
            },
            {
              "title": "Predictive distribution",
              "start_index": 508,
              "end_index": 509,
              "node_id": "0204"
            },
            {
              "title": "Lower bound",
              "start_index": 509,
              "end_index": 510,
              "node_id": "0205"
            }
          ],
          "node_id": "0202"
        },
        {
          "title": "Exponential Family Distributions",
          "start_index": 510,
          "end_index": 511,
          "nodes": [
            {
              "title": "Variational message passing",
              "start_index": 511,
              "end_index": 512,
              "node_id": "0207"
            }
          ],
          "node_id": "0206"
        },
        {
          "title": "Local Variational Methods",
          "start_index": 513,
          "end_index": 518,
          "node_id": "0208"
        },
        {
          "title": "Variational Logistic Regression",
          "start_index": 518,
          "end_index": 518,
          "nodes": [
            {
              "title": "Variational posterior distribution",
              "start_index": 518,
              "end_index": 520,
              "node_id": "0210"
            },
            {
              "title": "Optimizing the variational parameters",
              "start_index": 520,
              "end_index": 522,
              "node_id": "0211"
            },
            {
              "title": "Inference of hyperparameters",
              "start_index": 522,
              "end_index": 525,
              "node_id": "0212"
            }
          ],
          "node_id": "0209"
        },
        {
          "title": "Expectation Propagation",
          "start_index": 525,
          "end_index": 531,
          "nodes": [
            {
              "title": "Example: The clutter problem",
              "start_index": 531,
              "end_index": 533,
              "node_id": "0214"
            },
            {
              "title": "Expectation propagation on graphs",
              "start_index": 533,
              "end_index": 537,
              "node_id": "0215"
            }
          ],
          "node_id": "0213"
        }
      ],
      "node_id": "0190"
    },
    {
      "title": "Exercises",
      "start_index": 537,
      "end_index": 542,
      "node_id": "0216"
    },
    {
      "title": "Sampling Methods",
      "start_index": 543,
      "end_index": 546,
      "nodes": [
        {
          "title": "Basic Sampling Algorithms",
          "start_index": 546,
          "end_index": 546,
          "nodes": [
            {
              "title": "Standard distributions",
              "start_index": 546,
              "end_index": 548,
              "node_id": "0219"
            },
            {
              "title": "Rejection sampling",
              "start_index": 548,
              "end_index": 550,
              "node_id": "0220"
            },
            {
              "title": "Adaptive rejection sampling",
              "start_index": 550,
              "end_index": 552,
              "node_id": "0221"
            },
            {
              "title": "Importance sampling",
              "start_index": 552,
              "end_index": 554,
              "node_id": "0222"
            },
            {
              "title": "Sampling-importance-resampling",
              "start_index": 554,
              "end_index": 556,
              "node_id": "0223"
            },
            {
              "title": "Sampling and the EM algorithm",
              "start_index": 556,
              "end_index": 556,
              "node_id": "0224"
            }
          ],
          "node_id": "0218"
        },
        {
          "title": "Markov Chain Monte Carlo",
          "start_index": 557,
          "end_index": 559,
          "nodes": [
            {
              "title": "Markov chains",
              "start_index": 559,
              "end_index": 561,
              "node_id": "0226"
            },
            {
              "title": "The Metropolis-Hastings algorithm",
              "start_index": 561,
              "end_index": 562,
              "node_id": "0227"
            }
          ],
          "node_id": "0225"
        },
        {
          "title": "Gibbs Sampling",
          "start_index": 562,
          "end_index": 566,
          "node_id": "0228"
        },
        {
          "title": "Slice Sampling",
          "start_index": 566,
          "end_index": 568,
          "node_id": "0229"
        },
        {
          "title": "The Hybrid Monte Carlo Algorithm",
          "start_index": 568,
          "end_index": 568,
          "nodes": [
            {
              "title": "Dynamical systems",
              "start_index": 568,
              "end_index": 572,
              "node_id": "0231"
            },
            {
              "title": "Hybrid Monte Carlo",
              "start_index": 572,
              "end_index": 574,
              "node_id": "0232"
            }
          ],
          "node_id": "0230"
        },
        {
          "title": "Estimating the Partition Function",
          "start_index": 574,
          "end_index": 576,
          "node_id": "0233"
        }
      ],
      "node_id": "0217"
    },
    {
      "title": "Exercises",
      "start_index": 576,
      "end_index": 579,
      "node_id": "0234"
    },
    {
      "title": "Continuous Latent Variables",
      "start_index": 579,
      "end_index": 581,
      "nodes": [
        {
          "title": "Principal Component Analysis",
          "start_index": 581,
          "end_index": 581,
          "nodes": [
            {
              "title": "Maximum variance formulation",
              "start_index": 581,
              "end_index": 583,
              "node_id": "0237"
            },
            {
              "title": "Minimum-error formulation",
              "start_index": 583,
              "end_index": 585,
              "node_id": "0238"
            },
            {
              "title": "Applications of PCA",
              "start_index": 585,
              "end_index": 589,
              "node_id": "0239"
            },
            {
              "title": "PCA for high-dimensional data",
              "start_index": 589,
              "end_index": 590,
              "node_id": "0240"
            }
          ],
          "node_id": "0236"
        },
        {
          "title": "Probabilistic PCA",
          "start_index": 590,
          "end_index": 594,
          "nodes": [
            {
              "title": "Maximum likelihood PCA",
              "start_index": 594,
              "end_index": 597,
              "node_id": "0242"
            },
            {
              "title": "EM algorithm for PCA",
              "start_index": 597,
              "end_index": 600,
              "node_id": "0243"
            },
            {
              "title": "Bayesian PCA",
              "start_index": 600,
              "end_index": 603,
              "node_id": "0244"
            },
            {
              "title": "Factor analysis",
              "start_index": 603,
              "end_index": 606,
              "node_id": "0245"
            }
          ],
          "node_id": "0241"
        },
        {
          "title": "Kernel PCA",
          "start_index": 606,
          "end_index": 610,
          "node_id": "0246"
        },
        {
          "title": "Nonlinear Latent Variable Models",
          "start_index": 611,
          "end_index": 611,
          "nodes": [
            {
              "title": "Independent component analysis",
              "start_index": 611,
              "end_index": 612,
              "node_id": "0248"
            },
            {
              "title": "Autoassociative neural networks",
              "start_index": 612,
              "end_index": 615,
              "node_id": "0249"
            },
            {
              "title": "Modelling nonlinear manifolds",
              "start_index": 615,
              "end_index": 619,
              "node_id": "0250"
            }
          ],
          "node_id": "0247"
        }
      ],
      "node_id": "0235"
    },
    {
      "title": "Exercises",
      "start_index": 619,
      "end_index": 624,
      "node_id": "0251"
    },
    {
      "title": "Sequential Data",
      "start_index": 625,
      "end_index": 627,
      "nodes": [
        {
          "title": "Markov Models",
          "start_index": 627,
          "end_index": 630,
          "node_id": "0253"
        },
        {
          "title": "Hidden Markov Models",
          "start_index": 630,
          "end_index": 635,
          "nodes": [
            {
              "title": "Maximum likelihood for the HMM",
              "start_index": 635,
              "end_index": 638,
              "node_id": "0255"
            },
            {
              "title": "The forward-backward algorithm",
              "start_index": 638,
              "end_index": 645,
              "node_id": "0256"
            },
            {
              "title": "The sum-product algorithm for the HMM",
              "start_index": 645,
              "end_index": 647,
              "node_id": "0257"
            },
            {
              "title": "Scaling factors",
              "start_index": 647,
              "end_index": 649,
              "node_id": "0258"
            },
            {
              "title": "The Viterbi algorithm",
              "start_index": 649,
              "end_index": 651,
              "node_id": "0259"
            },
            {
              "title": "Extensions of the hidden Markov model",
              "start_index": 651,
              "end_index": 655,
              "node_id": "0260"
            }
          ],
          "node_id": "0254"
        },
        {
          "title": "Linear Dynamical Systems",
          "start_index": 655,
          "end_index": 658,
          "nodes": [
            {
              "title": "Inference in LDS",
              "start_index": 658,
              "end_index": 662,
              "node_id": "0262"
            },
            {
              "title": "Learning in LDS",
              "start_index": 662,
              "end_index": 664,
              "node_id": "0263"
            },
            {
              "title": "Extensions of LDS",
              "start_index": 664,
              "end_index": 665,
              "node_id": "0264"
            },
            {
              "title": "Particle filters",
              "start_index": 665,
              "end_index": 666,
              "node_id": "0265"
            }
          ],
          "node_id": "0261"
        }
      ],
      "node_id": "0252"
    },
    {
      "title": "Exercises",
      "start_index": 666,
      "end_index": 672,
      "node_id": "0266"
    },
    {
      "title": "Combining Models",
      "start_index": 673,
      "end_index": 674,
      "nodes": [
        {
          "title": "Bayesian Model Averaging",
          "start_index": 674,
          "end_index": 675,
          "node_id": "0268"
        },
        {
          "title": "Committees",
          "start_index": 675,
          "end_index": 677,
          "node_id": "0269"
        },
        {
          "title": "Boosting",
          "start_index": 677,
          "end_index": 679,
          "nodes": [
            {
              "title": "Minimizing exponential error",
              "start_index": 679,
              "end_index": 681,
              "node_id": "0271"
            },
            {
              "title": "Error functions for boosting",
              "start_index": 681,
              "end_index": 683,
              "node_id": "0272"
            }
          ],
          "node_id": "0270"
        },
        {
          "title": "Tree-based Models",
          "start_index": 683,
          "end_index": 686,
          "node_id": "0273"
        },
        {
          "title": "Conditional Mixture Models",
          "start_index": 686,
          "end_index": 687,
          "nodes": [
            {
              "title": "Mixtures of linear regression models",
              "start_index": 687,
              "end_index": 690,
              "node_id": "0275"
            },
            {
              "title": "Mixtures of logistic models",
              "start_index": 690,
              "end_index": 692,
              "node_id": "0276"
            },
            {
              "title": "Mixtures of experts",
              "start_index": 692,
              "end_index": 694,
              "node_id": "0277"
            }
          ],
          "node_id": "0274"
        }
      ],
      "node_id": "0267"
    },
    {
      "title": "Exercises",
      "start_index": 694,
      "end_index": 696,
      "node_id": "0278"
    },
    {
      "title": "Appendix A Data Sets",
      "start_index": 697,
      "end_index": 704,
      "node_id": "0279"
    },
    {
      "title": "Appendix B Probability Distributions",
      "start_index": 705,
      "end_index": 714,
      "node_id": "0280"
    },
    {
      "title": "Appendix C Properties of Matrices",
      "start_index": 715,
      "end_index": 722,
      "node_id": "0281"
    },
    {
      "title": "Appendix D Calculus of Variations",
      "start_index": 723,
      "end_index": 726,
      "node_id": "0282"
    },
    {
      "title": "Appendix E Lagrange Multipliers",
      "start_index": 727,
      "end_index": 730,
      "node_id": "0283"
    },
    {
      "title": "References",
      "start_index": 731,
      "end_index": 749,
      "node_id": "0284"
    },
    {
      "title": "Index",
      "start_index": 749,
      "end_index": 758,
      "node_id": "0285"
    }
  ]
}