PageIndex/results/PRML_structure.json
2025-04-01 18:54:08 +08:00

1558 lines
No EOL
37 KiB
JSON

[
{
"title": "Preface",
"start_index": 1,
"end_index": 6
},
{
"title": "Preface",
"start_index": 7,
"end_index": 10
},
{
"title": "Mathematical notation",
"start_index": 11,
"end_index": 13
},
{
"title": "Contents",
"start_index": 13,
"end_index": 20
},
{
"title": "Introduction",
"start_index": 21,
"end_index": 24,
"child_nodes": [
{
"title": "Example: Polynomial Curve Fitting",
"start_index": 24,
"end_index": 32
},
{
"title": "Probability Theory",
"start_index": 32,
"end_index": 37,
"child_nodes": [
{
"title": "Probability densities",
"start_index": 37,
"end_index": 39
},
{
"title": "Expectations and covariances",
"start_index": 39,
"end_index": 41
},
{
"title": "Bayesian probabilities",
"start_index": 41,
"end_index": 44
},
{
"title": "The Gaussian distribution",
"start_index": 44,
"end_index": 48
},
{
"title": "Curve fitting re-visited",
"start_index": 48,
"end_index": 50
},
{
"title": "Bayesian curve fitting",
"start_index": 50,
"end_index": 52
}
]
},
{
"title": "Model Selection",
"start_index": 52,
"end_index": 53
},
{
"title": "The Curse of Dimensionality",
"start_index": 53,
"end_index": 58
},
{
"title": "Decision Theory",
"start_index": 58,
"end_index": 59,
"child_nodes": [
{
"title": "Minimizing the misclassification rate",
"start_index": 59,
"end_index": 61
},
{
"title": "Minimizing the expected loss",
"start_index": 61,
"end_index": 62
},
{
"title": "The reject option",
"start_index": 62,
"end_index": 62
},
{
"title": "Inference and decision",
"start_index": 62,
"end_index": 66
},
{
"title": "Loss functions for regression",
"start_index": 66,
"end_index": 68
}
]
},
{
"title": "Information Theory",
"start_index": 68,
"end_index": 75,
"child_nodes": [
{
"title": "Relative entropy and mutual information",
"start_index": 75,
"end_index": 78
}
]
}
]
},
{
"title": "Exercises",
"start_index": 78,
"end_index": 87
},
{
"title": "Probability Distributions",
"start_index": 87,
"end_index": 88,
"child_nodes": [
{
"title": "Binary Variables",
"start_index": 88,
"end_index": 91,
"child_nodes": [
{
"title": "The beta distribution",
"start_index": 91,
"end_index": 94
}
]
},
{
"title": "Multinomial Variables",
"start_index": 94,
"end_index": 96,
"child_nodes": [
{
"title": "The Dirichlet distribution",
"start_index": 96,
"end_index": 98
}
]
},
{
"title": "The Gaussian Distribution",
"start_index": 98,
"end_index": 105,
"child_nodes": [
{
"title": "Conditional Gaussian distributions",
"start_index": 105,
"end_index": 108
},
{
"title": "Marginal Gaussian distributions",
"start_index": 108,
"end_index": 110
},
{
"title": "Bayes\u2019 theorem for Gaussian variables",
"start_index": 110,
"end_index": 113
},
{
"title": "Maximum likelihood for the Gaussian",
"start_index": 113,
"end_index": 114
},
{
"title": "Sequential estimation",
"start_index": 114,
"end_index": 117
},
{
"title": "Bayesian inference for the Gaussian",
"start_index": 117,
"end_index": 122
},
{
"title": "Student\u2019s t-distribution",
"start_index": 122,
"end_index": 125
},
{
"title": "Periodic variables",
"start_index": 125,
"end_index": 130
},
{
"title": "Mixtures of Gaussians",
"start_index": 130,
"end_index": 133
}
]
},
{
"title": "The Exponential Family",
"start_index": 133,
"end_index": 136,
"child_nodes": [
{
"title": "Maximum likelihood and sufficient statistics",
"start_index": 136,
"end_index": 137
},
{
"title": "Conjugate priors",
"start_index": 137,
"end_index": 137
},
{
"title": "Noninformative priors",
"start_index": 137,
"end_index": 140
}
]
},
{
"title": "Nonparametric Methods",
"start_index": 140,
"end_index": 142,
"child_nodes": [
{
"title": "Kernel density estimators",
"start_index": 142,
"end_index": 144
},
{
"title": "Nearest-neighbour methods",
"start_index": 144,
"end_index": 147
}
]
}
]
},
{
"title": "Exercises",
"start_index": 147,
"end_index": 156
},
{
"title": "Linear Models for Regression",
"start_index": 157,
"end_index": 158,
"child_nodes": [
{
"title": "Linear Basis Function Models",
"start_index": 158,
"end_index": 160,
"child_nodes": [
{
"title": "Maximum likelihood and least squares",
"start_index": 160,
"end_index": 163
},
{
"title": "Geometry of least squares",
"start_index": 163,
"end_index": 163
},
{
"title": "Sequential learning",
"start_index": 163,
"end_index": 164
},
{
"title": "Regularized least squares",
"start_index": 164,
"end_index": 166
},
{
"title": "Multiple outputs",
"start_index": 166,
"end_index": 167
}
]
},
{
"title": "The Bias-Variance Decomposition",
"start_index": 167,
"end_index": 172
},
{
"title": "Bayesian Linear Regression",
"start_index": 172,
"end_index": 172,
"child_nodes": [
{
"title": "Parameter distribution",
"start_index": 172,
"end_index": 176
},
{
"title": "Predictive distribution",
"start_index": 176,
"end_index": 179
},
{
"title": "Equivalent kernel",
"start_index": 179,
"end_index": 181
}
]
},
{
"title": "Bayesian Model Comparison",
"start_index": 181,
"end_index": 185
},
{
"title": "The Evidence Approximation",
"start_index": 185,
"end_index": 186,
"child_nodes": [
{
"title": "Evaluation of the evidence function",
"start_index": 186,
"end_index": 188
},
{
"title": "Maximizing the evidence function",
"start_index": 188,
"end_index": 190
},
{
"title": "Effective number of parameters",
"start_index": 190,
"end_index": 192
}
]
},
{
"title": "Limitations of Fixed Basis Functions",
"start_index": 192,
"end_index": 193
}
]
},
{
"title": "Exercises",
"start_index": 193,
"end_index": 198
},
{
"title": "Linear Models for Classification",
"start_index": 199,
"end_index": 201,
"child_nodes": [
{
"title": "Discriminant Functions",
"start_index": 201,
"end_index": 201,
"child_nodes": [
{
"title": "Two classes",
"start_index": 201,
"end_index": 202
},
{
"title": "Multiple classes",
"start_index": 202,
"end_index": 204
},
{
"title": "Least squares for classification",
"start_index": 204,
"end_index": 206
},
{
"title": "Fisher\u2019s linear discriminant",
"start_index": 206,
"end_index": 209
},
{
"title": "Relation to least squares",
"start_index": 209,
"end_index": 211
},
{
"title": "Fisher\u2019s discriminant for multiple classes",
"start_index": 211,
"end_index": 212
},
{
"title": "The perceptron algorithm",
"start_index": 212,
"end_index": 216
}
]
},
{
"title": "Probabilistic Generative Models",
"start_index": 216,
"end_index": 218,
"child_nodes": [
{
"title": "Continuous inputs",
"start_index": 218,
"end_index": 220
},
{
"title": "Maximum likelihood solution",
"start_index": 220,
"end_index": 222
},
{
"title": "Discrete features",
"start_index": 222,
"end_index": 222
},
{
"title": "Exponential family",
"start_index": 222,
"end_index": 223
}
]
},
{
"title": "Probabilistic Discriminative Models",
"start_index": 223,
"end_index": 224,
"child_nodes": [
{
"title": "Fixed basis functions",
"start_index": 224,
"end_index": 225
},
{
"title": "Logistic regression",
"start_index": 225,
"end_index": 227
},
{
"title": "Iterative reweighted least squares",
"start_index": 227,
"end_index": 229
},
{
"title": "Multiclass logistic regression",
"start_index": 229,
"end_index": 230
},
{
"title": "Probit regression",
"start_index": 230,
"end_index": 232
},
{
"title": "Canonical link functions",
"start_index": 232,
"end_index": 232
}
]
},
{
"title": "The Laplace Approximation",
"start_index": 233,
"end_index": 236,
"child_nodes": [
{
"title": "Model comparison and BIC",
"start_index": 236,
"end_index": 237
}
]
},
{
"title": "Bayesian Logistic Regression",
"start_index": 237,
"end_index": 237,
"child_nodes": [
{
"title": "Laplace approximation",
"start_index": 237,
"end_index": 238
},
{
"title": "Predictive distribution",
"start_index": 238,
"end_index": 240
}
]
}
]
},
{
"title": "Exercises",
"start_index": 240,
"end_index": 245
},
{
"title": "Neural Networks",
"start_index": 245,
"end_index": 247,
"child_nodes": [
{
"title": "Feed-forward Network Functions",
"start_index": 247,
"end_index": 251,
"child_nodes": [
{
"title": "Weight-space symmetries",
"start_index": 251,
"end_index": 252
}
]
},
{
"title": "Network Training",
"start_index": 252,
"end_index": 256,
"child_nodes": [
{
"title": "Parameter optimization",
"start_index": 256,
"end_index": 257
},
{
"title": "Local quadratic approximation",
"start_index": 257,
"end_index": 259
},
{
"title": "Use of gradient information",
"start_index": 259,
"end_index": 260
},
{
"title": "Gradient descent optimization",
"start_index": 260,
"end_index": 261
}
]
},
{
"title": "Error Backpropagation",
"start_index": 261,
"end_index": 262,
"child_nodes": [
{
"title": "Evaluation of error-function derivatives",
"start_index": 262,
"end_index": 265
},
{
"title": "A simple example",
"start_index": 265,
"end_index": 266
},
{
"title": "Efficiency of backpropagation",
"start_index": 266,
"end_index": 267
},
{
"title": "The Jacobian matrix",
"start_index": 267,
"end_index": 269
}
]
},
{
"title": "The Hessian Matrix",
"start_index": 269,
"end_index": 270,
"child_nodes": [
{
"title": "Diagonal approximation",
"start_index": 270,
"end_index": 271
},
{
"title": "Outer product approximation",
"start_index": 271,
"end_index": 272
},
{
"title": "Inverse Hessian",
"start_index": 272,
"end_index": 272
},
{
"title": "Finite differences",
"start_index": 272,
"end_index": 273
},
{
"title": "Exact evaluation of the Hessian",
"start_index": 273,
"end_index": 274
},
{
"title": "Fast multiplication by the Hessian",
"start_index": 274,
"end_index": 276
}
]
},
{
"title": "Regularization in Neural Networks",
"start_index": 276,
"end_index": 277,
"child_nodes": [
{
"title": "Consistent Gaussian priors",
"start_index": 277,
"end_index": 279
},
{
"title": "Early stopping",
"start_index": 279,
"end_index": 281
},
{
"title": "Invariances",
"start_index": 281,
"end_index": 283
},
{
"title": "Tangent propagation",
"start_index": 283,
"end_index": 285
},
{
"title": "Training with transformed data",
"start_index": 285,
"end_index": 287
},
{
"title": "Convolutional networks",
"start_index": 287,
"end_index": 289
},
{
"title": "Soft weight sharing",
"start_index": 289,
"end_index": 292
}
]
},
{
"title": "Mixture Density Networks",
"start_index": 292,
"end_index": 297
},
{
"title": "Bayesian Neural Networks",
"start_index": 297,
"end_index": 298,
"child_nodes": [
{
"title": "Posterior parameter distribution",
"start_index": 298,
"end_index": 300
},
{
"title": "Hyperparameter optimization",
"start_index": 300,
"end_index": 301
},
{
"title": "Bayesian neural networks for classification",
"start_index": 301,
"end_index": 304
}
]
}
]
},
{
"title": "Exercises",
"start_index": 304,
"end_index": 311
},
{
"title": "Kernel Methods",
"start_index": 311,
"end_index": 313,
"child_nodes": [
{
"title": "Dual Representations",
"start_index": 313,
"end_index": 314
},
{
"title": "Constructing Kernels",
"start_index": 314,
"end_index": 319
},
{
"title": "Radial Basis Function Networks",
"start_index": 319,
"end_index": 321,
"child_nodes": [
{
"title": "Nadaraya-Watson model",
"start_index": 321,
"end_index": 323
}
]
},
{
"title": "Gaussian Processes",
"start_index": 323,
"end_index": 324,
"child_nodes": [
{
"title": "Linear regression revisited",
"start_index": 324,
"end_index": 326
},
{
"title": "Gaussian processes for regression",
"start_index": 326,
"end_index": 331
},
{
"title": "Learning the hyperparameters",
"start_index": 331,
"end_index": 332
},
{
"title": "Automatic relevance determination",
"start_index": 332,
"end_index": 333
},
{
"title": "Gaussian processes for classification",
"start_index": 333,
"end_index": 335
},
{
"title": "Laplace approximation",
"start_index": 335,
"end_index": 339
},
{
"title": "Connection to neural networks",
"start_index": 339,
"end_index": 340
}
]
}
]
},
{
"title": "Exercises",
"start_index": 340,
"end_index": 344
},
{
"title": "Sparse Kernel Machines",
"start_index": 345,
"end_index": 346,
"child_nodes": [
{
"title": "Maximum Margin Classifiers",
"start_index": 346,
"end_index": 351,
"child_nodes": [
{
"title": "Overlapping class distributions",
"start_index": 351,
"end_index": 356
},
{
"title": "Relation to logistic regression",
"start_index": 356,
"end_index": 358
},
{
"title": "Multiclass SVMs",
"start_index": 358,
"end_index": 359
},
{
"title": "SVMs for regression",
"start_index": 359,
"end_index": 364
},
{
"title": "Computational learning theory",
"start_index": 364,
"end_index": 365
}
]
},
{
"title": "Relevance Vector Machines",
"start_index": 365,
"end_index": 365,
"child_nodes": [
{
"title": "RVM for regression",
"start_index": 365,
"end_index": 369
},
{
"title": "Analysis of sparsity",
"start_index": 369,
"end_index": 373
},
{
"title": "RVM for classification",
"start_index": 373,
"end_index": 377
}
]
}
]
},
{
"title": "Exercises",
"start_index": 377,
"end_index": 379
},
{
"title": "Graphical Models",
"start_index": 379,
"end_index": 380,
"child_nodes": [
{
"title": "Bayesian Networks",
"start_index": 380,
"end_index": 382,
"child_nodes": [
{
"title": "Example: Polynomial regression",
"start_index": 382,
"end_index": 385
},
{
"title": "Generative models",
"start_index": 385,
"end_index": 386
},
{
"title": "Discrete variables",
"start_index": 386,
"end_index": 390
},
{
"title": "Linear-Gaussian models",
"start_index": 390,
"end_index": 392
}
]
},
{
"title": "Conditional Independence",
"start_index": 392,
"end_index": 393,
"child_nodes": [
{
"title": "Three example graphs",
"start_index": 393,
"end_index": 398
},
{
"title": "D-separation",
"start_index": 398,
"end_index": 403
}
]
},
{
"title": "Markov Random Fields",
"start_index": 403,
"end_index": 403,
"child_nodes": [
{
"title": "Conditional independence properties",
"start_index": 403,
"end_index": 404
},
{
"title": "Factorization properties",
"start_index": 404,
"end_index": 407
},
{
"title": "Illustration: Image de-noising",
"start_index": 407,
"end_index": 410
},
{
"title": "Relation to directed graphs",
"start_index": 410,
"end_index": 413
}
]
},
{
"title": "Inference in Graphical Models",
"start_index": 413,
"end_index": 414,
"child_nodes": [
{
"title": "Inference on a chain",
"start_index": 414,
"end_index": 418
},
{
"title": "Trees",
"start_index": 418,
"end_index": 419
},
{
"title": "Factor graphs",
"start_index": 419,
"end_index": 422
},
{
"title": "The sum-product algorithm",
"start_index": 422,
"end_index": 431
},
{
"title": "The max-sum algorithm",
"start_index": 431,
"end_index": 436
},
{
"title": "Exact inference in general graphs",
"start_index": 436,
"end_index": 437
},
{
"title": "Loopy belief propagation",
"start_index": 437,
"end_index": 438
},
{
"title": "Learning the graph structure",
"start_index": 438,
"end_index": 438
}
]
}
]
},
{
"title": "Exercises",
"start_index": 438,
"end_index": 442
},
{
"title": "Mixture Models and EM",
"start_index": 443,
"end_index": 444,
"child_nodes": [
{
"title": "K-means Clustering",
"start_index": 444,
"end_index": 448,
"child_nodes": [
{
"title": "Image segmentation and compression",
"start_index": 448,
"end_index": 450
}
]
},
{
"title": "Mixtures of Gaussians",
"start_index": 450,
"end_index": 452,
"child_nodes": [
{
"title": "Maximum likelihood",
"start_index": 452,
"end_index": 455
},
{
"title": "EM for Gaussian mixtures",
"start_index": 455,
"end_index": 459
}
]
},
{
"title": "An Alternative View of EM",
"start_index": 459,
"end_index": 461,
"child_nodes": [
{
"title": "Gaussian mixtures revisited",
"start_index": 461,
"end_index": 463
},
{
"title": "Relation to K-means",
"start_index": 463,
"end_index": 464
},
{
"title": "Mixtures of Bernoulli distributions",
"start_index": 464,
"end_index": 468
},
{
"title": "EM for Bayesian linear regression",
"start_index": 468,
"end_index": 470
}
]
},
{
"title": "The EM Algorithm in General",
"start_index": 470,
"end_index": 475
}
]
},
{
"title": "Exercises",
"start_index": 475,
"end_index": 480
},
{
"title": "Approximate Inference",
"start_index": 481,
"end_index": 482,
"child_nodes": [
{
"title": "Variational Inference",
"start_index": 482,
"end_index": 484,
"child_nodes": [
{
"title": "Factorized distributions",
"start_index": 484,
"end_index": 486
},
{
"title": "Properties of factorized approximations",
"start_index": 486,
"end_index": 490
},
{
"title": "Example: The univariate Gaussian",
"start_index": 490,
"end_index": 493
},
{
"title": "Model comparison",
"start_index": 493,
"end_index": 494
}
]
},
{
"title": "Illustration: Variational Mixture of Gaussians",
"start_index": 494,
"end_index": 495,
"child_nodes": [
{
"title": "Variational distribution",
"start_index": 495,
"end_index": 501
},
{
"title": "Variational lower bound",
"start_index": 501,
"end_index": 502
},
{
"title": "Predictive density",
"start_index": 502,
"end_index": 503
},
{
"title": "Determining the number of components",
"start_index": 503,
"end_index": 505
},
{
"title": "Induced factorizations",
"start_index": 505,
"end_index": 506
}
]
},
{
"title": "Variational Linear Regression",
"start_index": 506,
"end_index": 506,
"child_nodes": [
{
"title": "Variational distribution",
"start_index": 506,
"end_index": 508
},
{
"title": "Predictive distribution",
"start_index": 508,
"end_index": 509
},
{
"title": "Lower bound",
"start_index": 509,
"end_index": 510
}
]
},
{
"title": "Exponential Family Distributions",
"start_index": 510,
"end_index": 511,
"child_nodes": [
{
"title": "Variational message passing",
"start_index": 511,
"end_index": 512
}
]
},
{
"title": "Local Variational Methods",
"start_index": 513,
"end_index": 518
},
{
"title": "Variational Logistic Regression",
"start_index": 518,
"end_index": 518,
"child_nodes": [
{
"title": "Variational posterior distribution",
"start_index": 518,
"end_index": 520
},
{
"title": "Optimizing the variational parameters",
"start_index": 520,
"end_index": 522
},
{
"title": "Inference of hyperparameters",
"start_index": 522,
"end_index": 525
}
]
},
{
"title": "Expectation Propagation",
"start_index": 525,
"end_index": 531,
"child_nodes": [
{
"title": "Example: The clutter problem",
"start_index": 531,
"end_index": 533
},
{
"title": "Expectation propagation on graphs",
"start_index": 533,
"end_index": 537
}
]
}
]
},
{
"title": "Exercises",
"start_index": 537,
"end_index": 542
},
{
"title": "Sampling Methods",
"start_index": 543,
"end_index": 546,
"child_nodes": [
{
"title": "Basic Sampling Algorithms",
"start_index": 546,
"end_index": 546,
"child_nodes": [
{
"title": "Standard distributions",
"start_index": 546,
"end_index": 548
},
{
"title": "Rejection sampling",
"start_index": 548,
"end_index": 550
},
{
"title": "Adaptive rejection sampling",
"start_index": 550,
"end_index": 552
},
{
"title": "Importance sampling",
"start_index": 552,
"end_index": 554
},
{
"title": "Sampling-importance-resampling",
"start_index": 554,
"end_index": 556
},
{
"title": "Sampling and the EM algorithm",
"start_index": 556,
"end_index": 556
}
]
},
{
"title": "Markov Chain Monte Carlo",
"start_index": 557,
"end_index": 559,
"child_nodes": [
{
"title": "Markov chains",
"start_index": 559,
"end_index": 561
},
{
"title": "The Metropolis-Hastings algorithm",
"start_index": 561,
"end_index": 562
}
]
},
{
"title": "Gibbs Sampling",
"start_index": 562,
"end_index": 566
},
{
"title": "Slice Sampling",
"start_index": 566,
"end_index": 568
},
{
"title": "The Hybrid Monte Carlo Algorithm",
"start_index": 568,
"end_index": 568,
"child_nodes": [
{
"title": "Dynamical systems",
"start_index": 568,
"end_index": 572
},
{
"title": "Hybrid Monte Carlo",
"start_index": 572,
"end_index": 574
}
]
},
{
"title": "Estimating the Partition Function",
"start_index": 574,
"end_index": 576
}
]
},
{
"title": "Exercises",
"start_index": 576,
"end_index": 579
},
{
"title": "Continuous Latent Variables",
"start_index": 579,
"end_index": 581,
"child_nodes": [
{
"title": "Principal Component Analysis",
"start_index": 581,
"end_index": 581,
"child_nodes": [
{
"title": "Maximum variance formulation",
"start_index": 581,
"end_index": 583
},
{
"title": "Minimum-error formulation",
"start_index": 583,
"end_index": 585
},
{
"title": "Applications of PCA",
"start_index": 585,
"end_index": 589
},
{
"title": "PCA for high-dimensional data",
"start_index": 589,
"end_index": 590
}
]
},
{
"title": "Probabilistic PCA",
"start_index": 590,
"end_index": 594,
"child_nodes": [
{
"title": "Maximum likelihood PCA",
"start_index": 594,
"end_index": 597
},
{
"title": "EM algorithm for PCA",
"start_index": 597,
"end_index": 600
},
{
"title": "Bayesian PCA",
"start_index": 600,
"end_index": 603
},
{
"title": "Factor analysis",
"start_index": 603,
"end_index": 606
}
]
},
{
"title": "Kernel PCA",
"start_index": 606,
"end_index": 610
},
{
"title": "Nonlinear Latent Variable Models",
"start_index": 611,
"end_index": 611,
"child_nodes": [
{
"title": "Independent component analysis",
"start_index": 611,
"end_index": 612
},
{
"title": "Autoassociative neural networks",
"start_index": 612,
"end_index": 615
},
{
"title": "Modelling nonlinear manifolds",
"start_index": 615,
"end_index": 619
}
]
}
]
},
{
"title": "Exercises",
"start_index": 619,
"end_index": 624
},
{
"title": "Sequential Data",
"start_index": 625,
"end_index": 627,
"child_nodes": [
{
"title": "Markov Models",
"start_index": 627,
"end_index": 630
},
{
"title": "Hidden Markov Models",
"start_index": 630,
"end_index": 635,
"child_nodes": [
{
"title": "Maximum likelihood for the HMM",
"start_index": 635,
"end_index": 638
},
{
"title": "The forward-backward algorithm",
"start_index": 638,
"end_index": 645
},
{
"title": "The sum-product algorithm for the HMM",
"start_index": 645,
"end_index": 647
},
{
"title": "Scaling factors",
"start_index": 647,
"end_index": 649
},
{
"title": "The Viterbi algorithm",
"start_index": 649,
"end_index": 651
},
{
"title": "Extensions of the hidden Markov model",
"start_index": 651,
"end_index": 655
}
]
},
{
"title": "Linear Dynamical Systems",
"start_index": 655,
"end_index": 658,
"child_nodes": [
{
"title": "Inference in LDS",
"start_index": 658,
"end_index": 662
},
{
"title": "Learning in LDS",
"start_index": 662,
"end_index": 664
},
{
"title": "Extensions of LDS",
"start_index": 664,
"end_index": 665
},
{
"title": "Particle filters",
"start_index": 665,
"end_index": 666
}
]
}
]
},
{
"title": "Exercises",
"start_index": 666,
"end_index": 672
},
{
"title": "Combining Models",
"start_index": 673,
"end_index": 674,
"child_nodes": [
{
"title": "Bayesian Model Averaging",
"start_index": 674,
"end_index": 675
},
{
"title": "Committees",
"start_index": 675,
"end_index": 677
},
{
"title": "Boosting",
"start_index": 677,
"end_index": 679,
"child_nodes": [
{
"title": "Minimizing exponential error",
"start_index": 679,
"end_index": 681
},
{
"title": "Error functions for boosting",
"start_index": 681,
"end_index": 683
}
]
},
{
"title": "Tree-based Models",
"start_index": 683,
"end_index": 686
},
{
"title": "Conditional Mixture Models",
"start_index": 686,
"end_index": 687,
"child_nodes": [
{
"title": "Mixtures of linear regression models",
"start_index": 687,
"end_index": 690
},
{
"title": "Mixtures of logistic models",
"start_index": 690,
"end_index": 692
},
{
"title": "Mixtures of experts",
"start_index": 692,
"end_index": 694
}
]
}
]
},
{
"title": "Exercises",
"start_index": 694,
"end_index": 696
},
{
"title": "Appendix A Data Sets",
"start_index": 697,
"end_index": 704
},
{
"title": "Appendix B Probability Distributions",
"start_index": 705,
"end_index": 714
},
{
"title": "Appendix C Properties of Matrices",
"start_index": 715,
"end_index": 722
},
{
"title": "Appendix D Calculus of Variations",
"start_index": 723,
"end_index": 726
},
{
"title": "Appendix E Lagrange Multipliers",
"start_index": 727,
"end_index": 730
},
{
"title": "References",
"start_index": 731,
"end_index": 749
},
{
"title": "Index",
"start_index": 749,
"end_index": 758
}
]