[
  {
    "title": "Preface",
    "start_index": 1,
    "end_index": 6
  },
  {
    "title": "Preface",
    "start_index": 7,
    "end_index": 10
  },
  {
    "title": "Mathematical notation",
    "start_index": 11,
    "end_index": 13
  },
  {
    "title": "Contents",
    "start_index": 13,
    "end_index": 20
  },
  {
    "title": "Introduction",
    "start_index": 21,
    "end_index": 24,
    "child_nodes": [
      {
        "title": "Example: Polynomial Curve Fitting",
        "start_index": 24,
        "end_index": 32
      },
      {
        "title": "Probability Theory",
        "start_index": 32,
        "end_index": 37,
        "child_nodes": [
          {
            "title": "Probability densities",
            "start_index": 37,
            "end_index": 39
          },
          {
            "title": "Expectations and covariances",
            "start_index": 39,
            "end_index": 41
          },
          {
            "title": "Bayesian probabilities",
            "start_index": 41,
            "end_index": 44
          },
          {
            "title": "The Gaussian distribution",
            "start_index": 44,
            "end_index": 48
          },
          {
            "title": "Curve fitting re-visited",
            "start_index": 48,
            "end_index": 50
          },
          {
            "title": "Bayesian curve fitting",
            "start_index": 50,
            "end_index": 52
          }
        ]
      },
      {
        "title": "Model Selection",
        "start_index": 52,
        "end_index": 53
      },
      {
        "title": "The Curse of Dimensionality",
        "start_index": 53,
        "end_index": 58
      },
      {
        "title": "Decision Theory",
        "start_index": 58,
        "end_index": 59,
        "child_nodes": [
          {
            "title": "Minimizing the misclassification rate",
            "start_index": 59,
            "end_index": 61
          },
          {
            "title": "Minimizing the expected loss",
            "start_index": 61,
            "end_index": 62
          },
          {
            "title": "The reject option",
            "start_index": 62,
            "end_index": 62
          },
          {
            "title": "Inference and decision",
            "start_index": 62,
            "end_index": 66
          },
          {
            "title": "Loss functions for regression",
            "start_index": 66,
            "end_index": 68
          }
        ]
      },
      {
        "title": "Information Theory",
        "start_index": 68,
        "end_index": 75,
        "child_nodes": [
          {
            "title": "Relative entropy and mutual information",
            "start_index": 75,
            "end_index": 78
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 78,
    "end_index": 87
  },
  {
    "title": "Probability Distributions",
    "start_index": 87,
    "end_index": 88,
    "child_nodes": [
      {
        "title": "Binary Variables",
        "start_index": 88,
        "end_index": 91,
        "child_nodes": [
          {
            "title": "The beta distribution",
            "start_index": 91,
            "end_index": 94
          }
        ]
      },
      {
        "title": "Multinomial Variables",
        "start_index": 94,
        "end_index": 96,
        "child_nodes": [
          {
            "title": "The Dirichlet distribution",
            "start_index": 96,
            "end_index": 98
          }
        ]
      },
      {
        "title": "The Gaussian Distribution",
        "start_index": 98,
        "end_index": 105,
        "child_nodes": [
          {
            "title": "Conditional Gaussian distributions",
            "start_index": 105,
            "end_index": 108
          },
          {
            "title": "Marginal Gaussian distributions",
            "start_index": 108,
            "end_index": 110
          },
          {
            "title": "Bayes\u2019 theorem for Gaussian variables",
            "start_index": 110,
            "end_index": 113
          },
          {
            "title": "Maximum likelihood for the Gaussian",
            "start_index": 113,
            "end_index": 114
          },
          {
            "title": "Sequential estimation",
            "start_index": 114,
            "end_index": 117
          },
          {
            "title": "Bayesian inference for the Gaussian",
            "start_index": 117,
            "end_index": 122
          },
          {
            "title": "Student\u2019s t-distribution",
            "start_index": 122,
            "end_index": 125
          },
          {
            "title": "Periodic variables",
            "start_index": 125,
            "end_index": 130
          },
          {
            "title": "Mixtures of Gaussians",
            "start_index": 130,
            "end_index": 133
          }
        ]
      },
      {
        "title": "The Exponential Family",
        "start_index": 133,
        "end_index": 136,
        "child_nodes": [
          {
            "title": "Maximum likelihood and sufficient statistics",
            "start_index": 136,
            "end_index": 137
          },
          {
            "title": "Conjugate priors",
            "start_index": 137,
            "end_index": 137
          },
          {
            "title": "Noninformative priors",
            "start_index": 137,
            "end_index": 140
          }
        ]
      },
      {
        "title": "Nonparametric Methods",
        "start_index": 140,
        "end_index": 142,
        "child_nodes": [
          {
            "title": "Kernel density estimators",
            "start_index": 142,
            "end_index": 144
          },
          {
            "title": "Nearest-neighbour methods",
            "start_index": 144,
            "end_index": 147
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 147,
    "end_index": 156
  },
  {
    "title": "Linear Models for Regression",
    "start_index": 157,
    "end_index": 158,
    "child_nodes": [
      {
        "title": "Linear Basis Function Models",
        "start_index": 158,
        "end_index": 160,
        "child_nodes": [
          {
            "title": "Maximum likelihood and least squares",
            "start_index": 160,
            "end_index": 163
          },
          {
            "title": "Geometry of least squares",
            "start_index": 163,
            "end_index": 163
          },
          {
            "title": "Sequential learning",
            "start_index": 163,
            "end_index": 164
          },
          {
            "title": "Regularized least squares",
            "start_index": 164,
            "end_index": 166
          },
          {
            "title": "Multiple outputs",
            "start_index": 166,
            "end_index": 167
          }
        ]
      },
      {
        "title": "The Bias-Variance Decomposition",
        "start_index": 167,
        "end_index": 172
      },
      {
        "title": "Bayesian Linear Regression",
        "start_index": 172,
        "end_index": 172,
        "child_nodes": [
          {
            "title": "Parameter distribution",
            "start_index": 172,
            "end_index": 176
          },
          {
            "title": "Predictive distribution",
            "start_index": 176,
            "end_index": 179
          },
          {
            "title": "Equivalent kernel",
            "start_index": 179,
            "end_index": 181
          }
        ]
      },
      {
        "title": "Bayesian Model Comparison",
        "start_index": 181,
        "end_index": 185
      },
      {
        "title": "The Evidence Approximation",
        "start_index": 185,
        "end_index": 186,
        "child_nodes": [
          {
            "title": "Evaluation of the evidence function",
            "start_index": 186,
            "end_index": 188
          },
          {
            "title": "Maximizing the evidence function",
            "start_index": 188,
            "end_index": 190
          },
          {
            "title": "Effective number of parameters",
            "start_index": 190,
            "end_index": 192
          }
        ]
      },
      {
        "title": "Limitations of Fixed Basis Functions",
        "start_index": 192,
        "end_index": 193
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 193,
    "end_index": 198
  },
  {
    "title": "Linear Models for Classification",
    "start_index": 199,
    "end_index": 201,
    "child_nodes": [
      {
        "title": "Discriminant Functions",
        "start_index": 201,
        "end_index": 201,
        "child_nodes": [
          {
            "title": "Two classes",
            "start_index": 201,
            "end_index": 202
          },
          {
            "title": "Multiple classes",
            "start_index": 202,
            "end_index": 204
          },
          {
            "title": "Least squares for classification",
            "start_index": 204,
            "end_index": 206
          },
          {
            "title": "Fisher\u2019s linear discriminant",
            "start_index": 206,
            "end_index": 209
          },
          {
            "title": "Relation to least squares",
            "start_index": 209,
            "end_index": 211
          },
          {
            "title": "Fisher\u2019s discriminant for multiple classes",
            "start_index": 211,
            "end_index": 212
          },
          {
            "title": "The perceptron algorithm",
            "start_index": 212,
            "end_index": 216
          }
        ]
      },
      {
        "title": "Probabilistic Generative Models",
        "start_index": 216,
        "end_index": 218,
        "child_nodes": [
          {
            "title": "Continuous inputs",
            "start_index": 218,
            "end_index": 220
          },
          {
            "title": "Maximum likelihood solution",
            "start_index": 220,
            "end_index": 222
          },
          {
            "title": "Discrete features",
            "start_index": 222,
            "end_index": 222
          },
          {
            "title": "Exponential family",
            "start_index": 222,
            "end_index": 223
          }
        ]
      },
      {
        "title": "Probabilistic Discriminative Models",
        "start_index": 223,
        "end_index": 224,
        "child_nodes": [
          {
            "title": "Fixed basis functions",
            "start_index": 224,
            "end_index": 225
          },
          {
            "title": "Logistic regression",
            "start_index": 225,
            "end_index": 227
          },
          {
            "title": "Iterative reweighted least squares",
            "start_index": 227,
            "end_index": 229
          },
          {
            "title": "Multiclass logistic regression",
            "start_index": 229,
            "end_index": 230
          },
          {
            "title": "Probit regression",
            "start_index": 230,
            "end_index": 232
          },
          {
            "title": "Canonical link functions",
            "start_index": 232,
            "end_index": 232
          }
        ]
      },
      {
        "title": "The Laplace Approximation",
        "start_index": 233,
        "end_index": 236,
        "child_nodes": [
          {
            "title": "Model comparison and BIC",
            "start_index": 236,
            "end_index": 237
          }
        ]
      },
      {
        "title": "Bayesian Logistic Regression",
        "start_index": 237,
        "end_index": 237,
        "child_nodes": [
          {
            "title": "Laplace approximation",
            "start_index": 237,
            "end_index": 238
          },
          {
            "title": "Predictive distribution",
            "start_index": 238,
            "end_index": 240
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 240,
    "end_index": 245
  },
  {
    "title": "Neural Networks",
    "start_index": 245,
    "end_index": 247,
    "child_nodes": [
      {
        "title": "Feed-forward Network Functions",
        "start_index": 247,
        "end_index": 251,
        "child_nodes": [
          {
            "title": "Weight-space symmetries",
            "start_index": 251,
            "end_index": 252
          }
        ]
      },
      {
        "title": "Network Training",
        "start_index": 252,
        "end_index": 256,
        "child_nodes": [
          {
            "title": "Parameter optimization",
            "start_index": 256,
            "end_index": 257
          },
          {
            "title": "Local quadratic approximation",
            "start_index": 257,
            "end_index": 259
          },
          {
            "title": "Use of gradient information",
            "start_index": 259,
            "end_index": 260
          },
          {
            "title": "Gradient descent optimization",
            "start_index": 260,
            "end_index": 261
          }
        ]
      },
      {
        "title": "Error Backpropagation",
        "start_index": 261,
        "end_index": 262,
        "child_nodes": [
          {
            "title": "Evaluation of error-function derivatives",
            "start_index": 262,
            "end_index": 265
          },
          {
            "title": "A simple example",
            "start_index": 265,
            "end_index": 266
          },
          {
            "title": "Efficiency of backpropagation",
            "start_index": 266,
            "end_index": 267
          },
          {
            "title": "The Jacobian matrix",
            "start_index": 267,
            "end_index": 269
          }
        ]
      },
      {
        "title": "The Hessian Matrix",
        "start_index": 269,
        "end_index": 270,
        "child_nodes": [
          {
            "title": "Diagonal approximation",
            "start_index": 270,
            "end_index": 271
          },
          {
            "title": "Outer product approximation",
            "start_index": 271,
            "end_index": 272
          },
          {
            "title": "Inverse Hessian",
            "start_index": 272,
            "end_index": 272
          },
          {
            "title": "Finite differences",
            "start_index": 272,
            "end_index": 273
          },
          {
            "title": "Exact evaluation of the Hessian",
            "start_index": 273,
            "end_index": 274
          },
          {
            "title": "Fast multiplication by the Hessian",
            "start_index": 274,
            "end_index": 276
          }
        ]
      },
      {
        "title": "Regularization in Neural Networks",
        "start_index": 276,
        "end_index": 277,
        "child_nodes": [
          {
            "title": "Consistent Gaussian priors",
            "start_index": 277,
            "end_index": 279
          },
          {
            "title": "Early stopping",
            "start_index": 279,
            "end_index": 281
          },
          {
            "title": "Invariances",
            "start_index": 281,
            "end_index": 283
          },
          {
            "title": "Tangent propagation",
            "start_index": 283,
            "end_index": 285
          },
          {
            "title": "Training with transformed data",
            "start_index": 285,
            "end_index": 287
          },
          {
            "title": "Convolutional networks",
            "start_index": 287,
            "end_index": 289
          },
          {
            "title": "Soft weight sharing",
            "start_index": 289,
            "end_index": 292
          }
        ]
      },
      {
        "title": "Mixture Density Networks",
        "start_index": 292,
        "end_index": 297
      },
      {
        "title": "Bayesian Neural Networks",
        "start_index": 297,
        "end_index": 298,
        "child_nodes": [
          {
            "title": "Posterior parameter distribution",
            "start_index": 298,
            "end_index": 300
          },
          {
            "title": "Hyperparameter optimization",
            "start_index": 300,
            "end_index": 301
          },
          {
            "title": "Bayesian neural networks for classification",
            "start_index": 301,
            "end_index": 304
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 304,
    "end_index": 311
  },
  {
    "title": "Kernel Methods",
    "start_index": 311,
    "end_index": 313,
    "child_nodes": [
      {
        "title": "Dual Representations",
        "start_index": 313,
        "end_index": 314
      },
      {
        "title": "Constructing Kernels",
        "start_index": 314,
        "end_index": 319
      },
      {
        "title": "Radial Basis Function Networks",
        "start_index": 319,
        "end_index": 321,
        "child_nodes": [
          {
            "title": "Nadaraya-Watson model",
            "start_index": 321,
            "end_index": 323
          }
        ]
      },
      {
        "title": "Gaussian Processes",
        "start_index": 323,
        "end_index": 324,
        "child_nodes": [
          {
            "title": "Linear regression revisited",
            "start_index": 324,
            "end_index": 326
          },
          {
            "title": "Gaussian processes for regression",
            "start_index": 326,
            "end_index": 331
          },
          {
            "title": "Learning the hyperparameters",
            "start_index": 331,
            "end_index": 332
          },
          {
            "title": "Automatic relevance determination",
            "start_index": 332,
            "end_index": 333
          },
          {
            "title": "Gaussian processes for classification",
            "start_index": 333,
            "end_index": 335
          },
          {
            "title": "Laplace approximation",
            "start_index": 335,
            "end_index": 339
          },
          {
            "title": "Connection to neural networks",
            "start_index": 339,
            "end_index": 340
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 340,
    "end_index": 344
  },
  {
    "title": "Sparse Kernel Machines",
    "start_index": 345,
    "end_index": 346,
    "child_nodes": [
      {
        "title": "Maximum Margin Classifiers",
        "start_index": 346,
        "end_index": 351,
        "child_nodes": [
          {
            "title": "Overlapping class distributions",
            "start_index": 351,
            "end_index": 356
          },
          {
            "title": "Relation to logistic regression",
            "start_index": 356,
            "end_index": 358
          },
          {
            "title": "Multiclass SVMs",
            "start_index": 358,
            "end_index": 359
          },
          {
            "title": "SVMs for regression",
            "start_index": 359,
            "end_index": 364
          },
          {
            "title": "Computational learning theory",
            "start_index": 364,
            "end_index": 365
          }
        ]
      },
      {
        "title": "Relevance Vector Machines",
        "start_index": 365,
        "end_index": 365,
        "child_nodes": [
          {
            "title": "RVM for regression",
            "start_index": 365,
            "end_index": 369
          },
          {
            "title": "Analysis of sparsity",
            "start_index": 369,
            "end_index": 373
          },
          {
            "title": "RVM for classification",
            "start_index": 373,
            "end_index": 377
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 377,
    "end_index": 379
  },
  {
    "title": "Graphical Models",
    "start_index": 379,
    "end_index": 380,
    "child_nodes": [
      {
        "title": "Bayesian Networks",
        "start_index": 380,
        "end_index": 382,
        "child_nodes": [
          {
            "title": "Example: Polynomial regression",
            "start_index": 382,
            "end_index": 385
          },
          {
            "title": "Generative models",
            "start_index": 385,
            "end_index": 386
          },
          {
            "title": "Discrete variables",
            "start_index": 386,
            "end_index": 390
          },
          {
            "title": "Linear-Gaussian models",
            "start_index": 390,
            "end_index": 392
          }
        ]
      },
      {
        "title": "Conditional Independence",
        "start_index": 392,
        "end_index": 393,
        "child_nodes": [
          {
            "title": "Three example graphs",
            "start_index": 393,
            "end_index": 398
          },
          {
            "title": "D-separation",
            "start_index": 398,
            "end_index": 403
          }
        ]
      },
      {
        "title": "Markov Random Fields",
        "start_index": 403,
        "end_index": 403,
        "child_nodes": [
          {
            "title": "Conditional independence properties",
            "start_index": 403,
            "end_index": 404
          },
          {
            "title": "Factorization properties",
            "start_index": 404,
            "end_index": 407
          },
          {
            "title": "Illustration: Image de-noising",
            "start_index": 407,
            "end_index": 410
          },
          {
            "title": "Relation to directed graphs",
            "start_index": 410,
            "end_index": 413
          }
        ]
      },
      {
        "title": "Inference in Graphical Models",
        "start_index": 413,
        "end_index": 414,
        "child_nodes": [
          {
            "title": "Inference on a chain",
            "start_index": 414,
            "end_index": 418
          },
          {
            "title": "Trees",
            "start_index": 418,
            "end_index": 419
          },
          {
            "title": "Factor graphs",
            "start_index": 419,
            "end_index": 422
          },
          {
            "title": "The sum-product algorithm",
            "start_index": 422,
            "end_index": 431
          },
          {
            "title": "The max-sum algorithm",
            "start_index": 431,
            "end_index": 436
          },
          {
            "title": "Exact inference in general graphs",
            "start_index": 436,
            "end_index": 437
          },
          {
            "title": "Loopy belief propagation",
            "start_index": 437,
            "end_index": 438
          },
          {
            "title": "Learning the graph structure",
            "start_index": 438,
            "end_index": 438
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 438,
    "end_index": 442
  },
  {
    "title": "Mixture Models and EM",
    "start_index": 443,
    "end_index": 444,
    "child_nodes": [
      {
        "title": "K-means Clustering",
        "start_index": 444,
        "end_index": 448,
        "child_nodes": [
          {
            "title": "Image segmentation and compression",
            "start_index": 448,
            "end_index": 450
          }
        ]
      },
      {
        "title": "Mixtures of Gaussians",
        "start_index": 450,
        "end_index": 452,
        "child_nodes": [
          {
            "title": "Maximum likelihood",
            "start_index": 452,
            "end_index": 455
          },
          {
            "title": "EM for Gaussian mixtures",
            "start_index": 455,
            "end_index": 459
          }
        ]
      },
      {
        "title": "An Alternative View of EM",
        "start_index": 459,
        "end_index": 461,
        "child_nodes": [
          {
            "title": "Gaussian mixtures revisited",
            "start_index": 461,
            "end_index": 463
          },
          {
            "title": "Relation to K-means",
            "start_index": 463,
            "end_index": 464
          },
          {
            "title": "Mixtures of Bernoulli distributions",
            "start_index": 464,
            "end_index": 468
          },
          {
            "title": "EM for Bayesian linear regression",
            "start_index": 468,
            "end_index": 470
          }
        ]
      },
      {
        "title": "The EM Algorithm in General",
        "start_index": 470,
        "end_index": 475
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 475,
    "end_index": 480
  },
  {
    "title": "Approximate Inference",
    "start_index": 481,
    "end_index": 482,
    "child_nodes": [
      {
        "title": "Variational Inference",
        "start_index": 482,
        "end_index": 484,
        "child_nodes": [
          {
            "title": "Factorized distributions",
            "start_index": 484,
            "end_index": 486
          },
          {
            "title": "Properties of factorized approximations",
            "start_index": 486,
            "end_index": 490
          },
          {
            "title": "Example: The univariate Gaussian",
            "start_index": 490,
            "end_index": 493
          },
          {
            "title": "Model comparison",
            "start_index": 493,
            "end_index": 494
          }
        ]
      },
      {
        "title": "Illustration: Variational Mixture of Gaussians",
        "start_index": 494,
        "end_index": 495,
        "child_nodes": [
          {
            "title": "Variational distribution",
            "start_index": 495,
            "end_index": 501
          },
          {
            "title": "Variational lower bound",
            "start_index": 501,
            "end_index": 502
          },
          {
            "title": "Predictive density",
            "start_index": 502,
            "end_index": 503
          },
          {
            "title": "Determining the number of components",
            "start_index": 503,
            "end_index": 505
          },
          {
            "title": "Induced factorizations",
            "start_index": 505,
            "end_index": 506
          }
        ]
      },
      {
        "title": "Variational Linear Regression",
        "start_index": 506,
        "end_index": 506,
        "child_nodes": [
          {
            "title": "Variational distribution",
            "start_index": 506,
            "end_index": 508
          },
          {
            "title": "Predictive distribution",
            "start_index": 508,
            "end_index": 509
          },
          {
            "title": "Lower bound",
            "start_index": 509,
            "end_index": 510
          }
        ]
      },
      {
        "title": "Exponential Family Distributions",
        "start_index": 510,
        "end_index": 511,
        "child_nodes": [
          {
            "title": "Variational message passing",
            "start_index": 511,
            "end_index": 512
          }
        ]
      },
      {
        "title": "Local Variational Methods",
        "start_index": 513,
        "end_index": 518
      },
      {
        "title": "Variational Logistic Regression",
        "start_index": 518,
        "end_index": 518,
        "child_nodes": [
          {
            "title": "Variational posterior distribution",
            "start_index": 518,
            "end_index": 520
          },
          {
            "title": "Optimizing the variational parameters",
            "start_index": 520,
            "end_index": 522
          },
          {
            "title": "Inference of hyperparameters",
            "start_index": 522,
            "end_index": 525
          }
        ]
      },
      {
        "title": "Expectation Propagation",
        "start_index": 525,
        "end_index": 531,
        "child_nodes": [
          {
            "title": "Example: The clutter problem",
            "start_index": 531,
            "end_index": 533
          },
          {
            "title": "Expectation propagation on graphs",
            "start_index": 533,
            "end_index": 537
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 537,
    "end_index": 542
  },
  {
    "title": "Sampling Methods",
    "start_index": 543,
    "end_index": 546,
    "child_nodes": [
      {
        "title": "Basic Sampling Algorithms",
        "start_index": 546,
        "end_index": 546,
        "child_nodes": [
          {
            "title": "Standard distributions",
            "start_index": 546,
            "end_index": 548
          },
          {
            "title": "Rejection sampling",
            "start_index": 548,
            "end_index": 550
          },
          {
            "title": "Adaptive rejection sampling",
            "start_index": 550,
            "end_index": 552
          },
          {
            "title": "Importance sampling",
            "start_index": 552,
            "end_index": 554
          },
          {
            "title": "Sampling-importance-resampling",
            "start_index": 554,
            "end_index": 556
          },
          {
            "title": "Sampling and the EM algorithm",
            "start_index": 556,
            "end_index": 556
          }
        ]
      },
      {
        "title": "Markov Chain Monte Carlo",
        "start_index": 557,
        "end_index": 559,
        "child_nodes": [
          {
            "title": "Markov chains",
            "start_index": 559,
            "end_index": 561
          },
          {
            "title": "The Metropolis-Hastings algorithm",
            "start_index": 561,
            "end_index": 562
          }
        ]
      },
      {
        "title": "Gibbs Sampling",
        "start_index": 562,
        "end_index": 566
      },
      {
        "title": "Slice Sampling",
        "start_index": 566,
        "end_index": 568
      },
      {
        "title": "The Hybrid Monte Carlo Algorithm",
        "start_index": 568,
        "end_index": 568,
        "child_nodes": [
          {
            "title": "Dynamical systems",
            "start_index": 568,
            "end_index": 572
          },
          {
            "title": "Hybrid Monte Carlo",
            "start_index": 572,
            "end_index": 574
          }
        ]
      },
      {
        "title": "Estimating the Partition Function",
        "start_index": 574,
        "end_index": 576
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 576,
    "end_index": 579
  },
  {
    "title": "Continuous Latent Variables",
    "start_index": 579,
    "end_index": 581,
    "child_nodes": [
      {
        "title": "Principal Component Analysis",
        "start_index": 581,
        "end_index": 581,
        "child_nodes": [
          {
            "title": "Maximum variance formulation",
            "start_index": 581,
            "end_index": 583
          },
          {
            "title": "Minimum-error formulation",
            "start_index": 583,
            "end_index": 585
          },
          {
            "title": "Applications of PCA",
            "start_index": 585,
            "end_index": 589
          },
          {
            "title": "PCA for high-dimensional data",
            "start_index": 589,
            "end_index": 590
          }
        ]
      },
      {
        "title": "Probabilistic PCA",
        "start_index": 590,
        "end_index": 594,
        "child_nodes": [
          {
            "title": "Maximum likelihood PCA",
            "start_index": 594,
            "end_index": 597
          },
          {
            "title": "EM algorithm for PCA",
            "start_index": 597,
            "end_index": 600
          },
          {
            "title": "Bayesian PCA",
            "start_index": 600,
            "end_index": 603
          },
          {
            "title": "Factor analysis",
            "start_index": 603,
            "end_index": 606
          }
        ]
      },
      {
        "title": "Kernel PCA",
        "start_index": 606,
        "end_index": 610
      },
      {
        "title": "Nonlinear Latent Variable Models",
        "start_index": 611,
        "end_index": 611,
        "child_nodes": [
          {
            "title": "Independent component analysis",
            "start_index": 611,
            "end_index": 612
          },
          {
            "title": "Autoassociative neural networks",
            "start_index": 612,
            "end_index": 615
          },
          {
            "title": "Modelling nonlinear manifolds",
            "start_index": 615,
            "end_index": 619
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 619,
    "end_index": 624
  },
  {
    "title": "Sequential Data",
    "start_index": 625,
    "end_index": 627,
    "child_nodes": [
      {
        "title": "Markov Models",
        "start_index": 627,
        "end_index": 630
      },
      {
        "title": "Hidden Markov Models",
        "start_index": 630,
        "end_index": 635,
        "child_nodes": [
          {
            "title": "Maximum likelihood for the HMM",
            "start_index": 635,
            "end_index": 638
          },
          {
            "title": "The forward-backward algorithm",
            "start_index": 638,
            "end_index": 645
          },
          {
            "title": "The sum-product algorithm for the HMM",
            "start_index": 645,
            "end_index": 647
          },
          {
            "title": "Scaling factors",
            "start_index": 647,
            "end_index": 649
          },
          {
            "title": "The Viterbi algorithm",
            "start_index": 649,
            "end_index": 651
          },
          {
            "title": "Extensions of the hidden Markov model",
            "start_index": 651,
            "end_index": 655
          }
        ]
      },
      {
        "title": "Linear Dynamical Systems",
        "start_index": 655,
        "end_index": 658,
        "child_nodes": [
          {
            "title": "Inference in LDS",
            "start_index": 658,
            "end_index": 662
          },
          {
            "title": "Learning in LDS",
            "start_index": 662,
            "end_index": 664
          },
          {
            "title": "Extensions of LDS",
            "start_index": 664,
            "end_index": 665
          },
          {
            "title": "Particle filters",
            "start_index": 665,
            "end_index": 666
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 666,
    "end_index": 672
  },
  {
    "title": "Combining Models",
    "start_index": 673,
    "end_index": 674,
    "child_nodes": [
      {
        "title": "Bayesian Model Averaging",
        "start_index": 674,
        "end_index": 675
      },
      {
        "title": "Committees",
        "start_index": 675,
        "end_index": 677
      },
      {
        "title": "Boosting",
        "start_index": 677,
        "end_index": 679,
        "child_nodes": [
          {
            "title": "Minimizing exponential error",
            "start_index": 679,
            "end_index": 681
          },
          {
            "title": "Error functions for boosting",
            "start_index": 681,
            "end_index": 683
          }
        ]
      },
      {
        "title": "Tree-based Models",
        "start_index": 683,
        "end_index": 686
      },
      {
        "title": "Conditional Mixture Models",
        "start_index": 686,
        "end_index": 687,
        "child_nodes": [
          {
            "title": "Mixtures of linear regression models",
            "start_index": 687,
            "end_index": 690
          },
          {
            "title": "Mixtures of logistic models",
            "start_index": 690,
            "end_index": 692
          },
          {
            "title": "Mixtures of experts",
            "start_index": 692,
            "end_index": 694
          }
        ]
      }
    ]
  },
  {
    "title": "Exercises",
    "start_index": 694,
    "end_index": 696
  },
  {
    "title": "Appendix A Data Sets",
    "start_index": 697,
    "end_index": 704
  },
  {
    "title": "Appendix B Probability Distributions",
    "start_index": 705,
    "end_index": 714
  },
  {
    "title": "Appendix C Properties of Matrices",
    "start_index": 715,
    "end_index": 722
  },
  {
    "title": "Appendix D Calculus of Variations",
    "start_index": 723,
    "end_index": 726
  },
  {
    "title": "Appendix E Lagrange Multipliers",
    "start_index": 727,
    "end_index": 730
  },
  {
    "title": "References",
    "start_index": 731,
    "end_index": 749
  },
  {
    "title": "Index",
    "start_index": 749,
    "end_index": 758
  }
]