mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-04-26 00:26:21 +02:00
1558 lines
No EOL
37 KiB
JSON
1558 lines
No EOL
37 KiB
JSON
[
|
|
{
|
|
"title": "Preface",
|
|
"start_index": 1,
|
|
"end_index": 6
|
|
},
|
|
{
|
|
"title": "Preface",
|
|
"start_index": 7,
|
|
"end_index": 10
|
|
},
|
|
{
|
|
"title": "Mathematical notation",
|
|
"start_index": 11,
|
|
"end_index": 13
|
|
},
|
|
{
|
|
"title": "Contents",
|
|
"start_index": 13,
|
|
"end_index": 20
|
|
},
|
|
{
|
|
"title": "Introduction",
|
|
"start_index": 21,
|
|
"end_index": 24,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Example: Polynomial Curve Fitting",
|
|
"start_index": 24,
|
|
"end_index": 32
|
|
},
|
|
{
|
|
"title": "Probability Theory",
|
|
"start_index": 32,
|
|
"end_index": 37,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Probability densities",
|
|
"start_index": 37,
|
|
"end_index": 39
|
|
},
|
|
{
|
|
"title": "Expectations and covariances",
|
|
"start_index": 39,
|
|
"end_index": 41
|
|
},
|
|
{
|
|
"title": "Bayesian probabilities",
|
|
"start_index": 41,
|
|
"end_index": 44
|
|
},
|
|
{
|
|
"title": "The Gaussian distribution",
|
|
"start_index": 44,
|
|
"end_index": 48
|
|
},
|
|
{
|
|
"title": "Curve fitting re-visited",
|
|
"start_index": 48,
|
|
"end_index": 50
|
|
},
|
|
{
|
|
"title": "Bayesian curve fitting",
|
|
"start_index": 50,
|
|
"end_index": 52
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Model Selection",
|
|
"start_index": 52,
|
|
"end_index": 53
|
|
},
|
|
{
|
|
"title": "The Curse of Dimensionality",
|
|
"start_index": 53,
|
|
"end_index": 58
|
|
},
|
|
{
|
|
"title": "Decision Theory",
|
|
"start_index": 58,
|
|
"end_index": 59,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Minimizing the misclassification rate",
|
|
"start_index": 59,
|
|
"end_index": 61
|
|
},
|
|
{
|
|
"title": "Minimizing the expected loss",
|
|
"start_index": 61,
|
|
"end_index": 62
|
|
},
|
|
{
|
|
"title": "The reject option",
|
|
"start_index": 62,
|
|
"end_index": 62
|
|
},
|
|
{
|
|
"title": "Inference and decision",
|
|
"start_index": 62,
|
|
"end_index": 66
|
|
},
|
|
{
|
|
"title": "Loss functions for regression",
|
|
"start_index": 66,
|
|
"end_index": 68
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Information Theory",
|
|
"start_index": 68,
|
|
"end_index": 75,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Relative entropy and mutual information",
|
|
"start_index": 75,
|
|
"end_index": 78
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 78,
|
|
"end_index": 87
|
|
},
|
|
{
|
|
"title": "Probability Distributions",
|
|
"start_index": 87,
|
|
"end_index": 88,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Binary Variables",
|
|
"start_index": 88,
|
|
"end_index": 91,
|
|
"child_nodes": [
|
|
{
|
|
"title": "The beta distribution",
|
|
"start_index": 91,
|
|
"end_index": 94
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Multinomial Variables",
|
|
"start_index": 94,
|
|
"end_index": 96,
|
|
"child_nodes": [
|
|
{
|
|
"title": "The Dirichlet distribution",
|
|
"start_index": 96,
|
|
"end_index": 98
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The Gaussian Distribution",
|
|
"start_index": 98,
|
|
"end_index": 105,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Conditional Gaussian distributions",
|
|
"start_index": 105,
|
|
"end_index": 108
|
|
},
|
|
{
|
|
"title": "Marginal Gaussian distributions",
|
|
"start_index": 108,
|
|
"end_index": 110
|
|
},
|
|
{
|
|
"title": "Bayes\u2019 theorem for Gaussian variables",
|
|
"start_index": 110,
|
|
"end_index": 113
|
|
},
|
|
{
|
|
"title": "Maximum likelihood for the Gaussian",
|
|
"start_index": 113,
|
|
"end_index": 114
|
|
},
|
|
{
|
|
"title": "Sequential estimation",
|
|
"start_index": 114,
|
|
"end_index": 117
|
|
},
|
|
{
|
|
"title": "Bayesian inference for the Gaussian",
|
|
"start_index": 117,
|
|
"end_index": 122
|
|
},
|
|
{
|
|
"title": "Student\u2019s t-distribution",
|
|
"start_index": 122,
|
|
"end_index": 125
|
|
},
|
|
{
|
|
"title": "Periodic variables",
|
|
"start_index": 125,
|
|
"end_index": 130
|
|
},
|
|
{
|
|
"title": "Mixtures of Gaussians",
|
|
"start_index": 130,
|
|
"end_index": 133
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The Exponential Family",
|
|
"start_index": 133,
|
|
"end_index": 136,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum likelihood and sufficient statistics",
|
|
"start_index": 136,
|
|
"end_index": 137
|
|
},
|
|
{
|
|
"title": "Conjugate priors",
|
|
"start_index": 137,
|
|
"end_index": 137
|
|
},
|
|
{
|
|
"title": "Noninformative priors",
|
|
"start_index": 137,
|
|
"end_index": 140
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Nonparametric Methods",
|
|
"start_index": 140,
|
|
"end_index": 142,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Kernel density estimators",
|
|
"start_index": 142,
|
|
"end_index": 144
|
|
},
|
|
{
|
|
"title": "Nearest-neighbour methods",
|
|
"start_index": 144,
|
|
"end_index": 147
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 147,
|
|
"end_index": 156
|
|
},
|
|
{
|
|
"title": "Linear Models for Regression",
|
|
"start_index": 157,
|
|
"end_index": 158,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Linear Basis Function Models",
|
|
"start_index": 158,
|
|
"end_index": 160,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum likelihood and least squares",
|
|
"start_index": 160,
|
|
"end_index": 163
|
|
},
|
|
{
|
|
"title": "Geometry of least squares",
|
|
"start_index": 163,
|
|
"end_index": 163
|
|
},
|
|
{
|
|
"title": "Sequential learning",
|
|
"start_index": 163,
|
|
"end_index": 164
|
|
},
|
|
{
|
|
"title": "Regularized least squares",
|
|
"start_index": 164,
|
|
"end_index": 166
|
|
},
|
|
{
|
|
"title": "Multiple outputs",
|
|
"start_index": 166,
|
|
"end_index": 167
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The Bias-Variance Decomposition",
|
|
"start_index": 167,
|
|
"end_index": 172
|
|
},
|
|
{
|
|
"title": "Bayesian Linear Regression",
|
|
"start_index": 172,
|
|
"end_index": 172,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Parameter distribution",
|
|
"start_index": 172,
|
|
"end_index": 176
|
|
},
|
|
{
|
|
"title": "Predictive distribution",
|
|
"start_index": 176,
|
|
"end_index": 179
|
|
},
|
|
{
|
|
"title": "Equivalent kernel",
|
|
"start_index": 179,
|
|
"end_index": 181
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Bayesian Model Comparison",
|
|
"start_index": 181,
|
|
"end_index": 185
|
|
},
|
|
{
|
|
"title": "The Evidence Approximation",
|
|
"start_index": 185,
|
|
"end_index": 186,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Evaluation of the evidence function",
|
|
"start_index": 186,
|
|
"end_index": 188
|
|
},
|
|
{
|
|
"title": "Maximizing the evidence function",
|
|
"start_index": 188,
|
|
"end_index": 190
|
|
},
|
|
{
|
|
"title": "Effective number of parameters",
|
|
"start_index": 190,
|
|
"end_index": 192
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Limitations of Fixed Basis Functions",
|
|
"start_index": 192,
|
|
"end_index": 193
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 193,
|
|
"end_index": 198
|
|
},
|
|
{
|
|
"title": "Linear Models for Classification",
|
|
"start_index": 199,
|
|
"end_index": 201,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Discriminant Functions",
|
|
"start_index": 201,
|
|
"end_index": 201,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Two classes",
|
|
"start_index": 201,
|
|
"end_index": 202
|
|
},
|
|
{
|
|
"title": "Multiple classes",
|
|
"start_index": 202,
|
|
"end_index": 204
|
|
},
|
|
{
|
|
"title": "Least squares for classification",
|
|
"start_index": 204,
|
|
"end_index": 206
|
|
},
|
|
{
|
|
"title": "Fisher\u2019s linear discriminant",
|
|
"start_index": 206,
|
|
"end_index": 209
|
|
},
|
|
{
|
|
"title": "Relation to least squares",
|
|
"start_index": 209,
|
|
"end_index": 211
|
|
},
|
|
{
|
|
"title": "Fisher\u2019s discriminant for multiple classes",
|
|
"start_index": 211,
|
|
"end_index": 212
|
|
},
|
|
{
|
|
"title": "The perceptron algorithm",
|
|
"start_index": 212,
|
|
"end_index": 216
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Probabilistic Generative Models",
|
|
"start_index": 216,
|
|
"end_index": 218,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Continuous inputs",
|
|
"start_index": 218,
|
|
"end_index": 220
|
|
},
|
|
{
|
|
"title": "Maximum likelihood solution",
|
|
"start_index": 220,
|
|
"end_index": 222
|
|
},
|
|
{
|
|
"title": "Discrete features",
|
|
"start_index": 222,
|
|
"end_index": 222
|
|
},
|
|
{
|
|
"title": "Exponential family",
|
|
"start_index": 222,
|
|
"end_index": 223
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Probabilistic Discriminative Models",
|
|
"start_index": 223,
|
|
"end_index": 224,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Fixed basis functions",
|
|
"start_index": 224,
|
|
"end_index": 225
|
|
},
|
|
{
|
|
"title": "Logistic regression",
|
|
"start_index": 225,
|
|
"end_index": 227
|
|
},
|
|
{
|
|
"title": "Iterative reweighted least squares",
|
|
"start_index": 227,
|
|
"end_index": 229
|
|
},
|
|
{
|
|
"title": "Multiclass logistic regression",
|
|
"start_index": 229,
|
|
"end_index": 230
|
|
},
|
|
{
|
|
"title": "Probit regression",
|
|
"start_index": 230,
|
|
"end_index": 232
|
|
},
|
|
{
|
|
"title": "Canonical link functions",
|
|
"start_index": 232,
|
|
"end_index": 232
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The Laplace Approximation",
|
|
"start_index": 233,
|
|
"end_index": 236,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Model comparison and BIC",
|
|
"start_index": 236,
|
|
"end_index": 237
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Bayesian Logistic Regression",
|
|
"start_index": 237,
|
|
"end_index": 237,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Laplace approximation",
|
|
"start_index": 237,
|
|
"end_index": 238
|
|
},
|
|
{
|
|
"title": "Predictive distribution",
|
|
"start_index": 238,
|
|
"end_index": 240
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 240,
|
|
"end_index": 245
|
|
},
|
|
{
|
|
"title": "Neural Networks",
|
|
"start_index": 245,
|
|
"end_index": 247,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Feed-forward Network Functions",
|
|
"start_index": 247,
|
|
"end_index": 251,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Weight-space symmetries",
|
|
"start_index": 251,
|
|
"end_index": 252
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Network Training",
|
|
"start_index": 252,
|
|
"end_index": 256,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Parameter optimization",
|
|
"start_index": 256,
|
|
"end_index": 257
|
|
},
|
|
{
|
|
"title": "Local quadratic approximation",
|
|
"start_index": 257,
|
|
"end_index": 259
|
|
},
|
|
{
|
|
"title": "Use of gradient information",
|
|
"start_index": 259,
|
|
"end_index": 260
|
|
},
|
|
{
|
|
"title": "Gradient descent optimization",
|
|
"start_index": 260,
|
|
"end_index": 261
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Error Backpropagation",
|
|
"start_index": 261,
|
|
"end_index": 262,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Evaluation of error-function derivatives",
|
|
"start_index": 262,
|
|
"end_index": 265
|
|
},
|
|
{
|
|
"title": "A simple example",
|
|
"start_index": 265,
|
|
"end_index": 266
|
|
},
|
|
{
|
|
"title": "Efficiency of backpropagation",
|
|
"start_index": 266,
|
|
"end_index": 267
|
|
},
|
|
{
|
|
"title": "The Jacobian matrix",
|
|
"start_index": 267,
|
|
"end_index": 269
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The Hessian Matrix",
|
|
"start_index": 269,
|
|
"end_index": 270,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Diagonal approximation",
|
|
"start_index": 270,
|
|
"end_index": 271
|
|
},
|
|
{
|
|
"title": "Outer product approximation",
|
|
"start_index": 271,
|
|
"end_index": 272
|
|
},
|
|
{
|
|
"title": "Inverse Hessian",
|
|
"start_index": 272,
|
|
"end_index": 272
|
|
},
|
|
{
|
|
"title": "Finite differences",
|
|
"start_index": 272,
|
|
"end_index": 273
|
|
},
|
|
{
|
|
"title": "Exact evaluation of the Hessian",
|
|
"start_index": 273,
|
|
"end_index": 274
|
|
},
|
|
{
|
|
"title": "Fast multiplication by the Hessian",
|
|
"start_index": 274,
|
|
"end_index": 276
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Regularization in Neural Networks",
|
|
"start_index": 276,
|
|
"end_index": 277,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Consistent Gaussian priors",
|
|
"start_index": 277,
|
|
"end_index": 279
|
|
},
|
|
{
|
|
"title": "Early stopping",
|
|
"start_index": 279,
|
|
"end_index": 281
|
|
},
|
|
{
|
|
"title": "Invariances",
|
|
"start_index": 281,
|
|
"end_index": 283
|
|
},
|
|
{
|
|
"title": "Tangent propagation",
|
|
"start_index": 283,
|
|
"end_index": 285
|
|
},
|
|
{
|
|
"title": "Training with transformed data",
|
|
"start_index": 285,
|
|
"end_index": 287
|
|
},
|
|
{
|
|
"title": "Convolutional networks",
|
|
"start_index": 287,
|
|
"end_index": 289
|
|
},
|
|
{
|
|
"title": "Soft weight sharing",
|
|
"start_index": 289,
|
|
"end_index": 292
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Mixture Density Networks",
|
|
"start_index": 292,
|
|
"end_index": 297
|
|
},
|
|
{
|
|
"title": "Bayesian Neural Networks",
|
|
"start_index": 297,
|
|
"end_index": 298,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Posterior parameter distribution",
|
|
"start_index": 298,
|
|
"end_index": 300
|
|
},
|
|
{
|
|
"title": "Hyperparameter optimization",
|
|
"start_index": 300,
|
|
"end_index": 301
|
|
},
|
|
{
|
|
"title": "Bayesian neural networks for classification",
|
|
"start_index": 301,
|
|
"end_index": 304
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 304,
|
|
"end_index": 311
|
|
},
|
|
{
|
|
"title": "Kernel Methods",
|
|
"start_index": 311,
|
|
"end_index": 313,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Dual Representations",
|
|
"start_index": 313,
|
|
"end_index": 314
|
|
},
|
|
{
|
|
"title": "Constructing Kernels",
|
|
"start_index": 314,
|
|
"end_index": 319
|
|
},
|
|
{
|
|
"title": "Radial Basis Function Networks",
|
|
"start_index": 319,
|
|
"end_index": 321,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Nadaraya-Watson model",
|
|
"start_index": 321,
|
|
"end_index": 323
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Gaussian Processes",
|
|
"start_index": 323,
|
|
"end_index": 324,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Linear regression revisited",
|
|
"start_index": 324,
|
|
"end_index": 326
|
|
},
|
|
{
|
|
"title": "Gaussian processes for regression",
|
|
"start_index": 326,
|
|
"end_index": 331
|
|
},
|
|
{
|
|
"title": "Learning the hyperparameters",
|
|
"start_index": 331,
|
|
"end_index": 332
|
|
},
|
|
{
|
|
"title": "Automatic relevance determination",
|
|
"start_index": 332,
|
|
"end_index": 333
|
|
},
|
|
{
|
|
"title": "Gaussian processes for classification",
|
|
"start_index": 333,
|
|
"end_index": 335
|
|
},
|
|
{
|
|
"title": "Laplace approximation",
|
|
"start_index": 335,
|
|
"end_index": 339
|
|
},
|
|
{
|
|
"title": "Connection to neural networks",
|
|
"start_index": 339,
|
|
"end_index": 340
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 340,
|
|
"end_index": 344
|
|
},
|
|
{
|
|
"title": "Sparse Kernel Machines",
|
|
"start_index": 345,
|
|
"end_index": 346,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum Margin Classifiers",
|
|
"start_index": 346,
|
|
"end_index": 351,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Overlapping class distributions",
|
|
"start_index": 351,
|
|
"end_index": 356
|
|
},
|
|
{
|
|
"title": "Relation to logistic regression",
|
|
"start_index": 356,
|
|
"end_index": 358
|
|
},
|
|
{
|
|
"title": "Multiclass SVMs",
|
|
"start_index": 358,
|
|
"end_index": 359
|
|
},
|
|
{
|
|
"title": "SVMs for regression",
|
|
"start_index": 359,
|
|
"end_index": 364
|
|
},
|
|
{
|
|
"title": "Computational learning theory",
|
|
"start_index": 364,
|
|
"end_index": 365
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Relevance Vector Machines",
|
|
"start_index": 365,
|
|
"end_index": 365,
|
|
"child_nodes": [
|
|
{
|
|
"title": "RVM for regression",
|
|
"start_index": 365,
|
|
"end_index": 369
|
|
},
|
|
{
|
|
"title": "Analysis of sparsity",
|
|
"start_index": 369,
|
|
"end_index": 373
|
|
},
|
|
{
|
|
"title": "RVM for classification",
|
|
"start_index": 373,
|
|
"end_index": 377
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 377,
|
|
"end_index": 379
|
|
},
|
|
{
|
|
"title": "Graphical Models",
|
|
"start_index": 379,
|
|
"end_index": 380,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Bayesian Networks",
|
|
"start_index": 380,
|
|
"end_index": 382,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Example: Polynomial regression",
|
|
"start_index": 382,
|
|
"end_index": 385
|
|
},
|
|
{
|
|
"title": "Generative models",
|
|
"start_index": 385,
|
|
"end_index": 386
|
|
},
|
|
{
|
|
"title": "Discrete variables",
|
|
"start_index": 386,
|
|
"end_index": 390
|
|
},
|
|
{
|
|
"title": "Linear-Gaussian models",
|
|
"start_index": 390,
|
|
"end_index": 392
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Conditional Independence",
|
|
"start_index": 392,
|
|
"end_index": 393,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Three example graphs",
|
|
"start_index": 393,
|
|
"end_index": 398
|
|
},
|
|
{
|
|
"title": "D-separation",
|
|
"start_index": 398,
|
|
"end_index": 403
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Markov Random Fields",
|
|
"start_index": 403,
|
|
"end_index": 403,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Conditional independence properties",
|
|
"start_index": 403,
|
|
"end_index": 404
|
|
},
|
|
{
|
|
"title": "Factorization properties",
|
|
"start_index": 404,
|
|
"end_index": 407
|
|
},
|
|
{
|
|
"title": "Illustration: Image de-noising",
|
|
"start_index": 407,
|
|
"end_index": 410
|
|
},
|
|
{
|
|
"title": "Relation to directed graphs",
|
|
"start_index": 410,
|
|
"end_index": 413
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Inference in Graphical Models",
|
|
"start_index": 413,
|
|
"end_index": 414,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Inference on a chain",
|
|
"start_index": 414,
|
|
"end_index": 418
|
|
},
|
|
{
|
|
"title": "Trees",
|
|
"start_index": 418,
|
|
"end_index": 419
|
|
},
|
|
{
|
|
"title": "Factor graphs",
|
|
"start_index": 419,
|
|
"end_index": 422
|
|
},
|
|
{
|
|
"title": "The sum-product algorithm",
|
|
"start_index": 422,
|
|
"end_index": 431
|
|
},
|
|
{
|
|
"title": "The max-sum algorithm",
|
|
"start_index": 431,
|
|
"end_index": 436
|
|
},
|
|
{
|
|
"title": "Exact inference in general graphs",
|
|
"start_index": 436,
|
|
"end_index": 437
|
|
},
|
|
{
|
|
"title": "Loopy belief propagation",
|
|
"start_index": 437,
|
|
"end_index": 438
|
|
},
|
|
{
|
|
"title": "Learning the graph structure",
|
|
"start_index": 438,
|
|
"end_index": 438
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 438,
|
|
"end_index": 442
|
|
},
|
|
{
|
|
"title": "Mixture Models and EM",
|
|
"start_index": 443,
|
|
"end_index": 444,
|
|
"child_nodes": [
|
|
{
|
|
"title": "K-means Clustering",
|
|
"start_index": 444,
|
|
"end_index": 448,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Image segmentation and compression",
|
|
"start_index": 448,
|
|
"end_index": 450
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Mixtures of Gaussians",
|
|
"start_index": 450,
|
|
"end_index": 452,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum likelihood",
|
|
"start_index": 452,
|
|
"end_index": 455
|
|
},
|
|
{
|
|
"title": "EM for Gaussian mixtures",
|
|
"start_index": 455,
|
|
"end_index": 459
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "An Alternative View of EM",
|
|
"start_index": 459,
|
|
"end_index": 461,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Gaussian mixtures revisited",
|
|
"start_index": 461,
|
|
"end_index": 463
|
|
},
|
|
{
|
|
"title": "Relation to K-means",
|
|
"start_index": 463,
|
|
"end_index": 464
|
|
},
|
|
{
|
|
"title": "Mixtures of Bernoulli distributions",
|
|
"start_index": 464,
|
|
"end_index": 468
|
|
},
|
|
{
|
|
"title": "EM for Bayesian linear regression",
|
|
"start_index": 468,
|
|
"end_index": 470
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "The EM Algorithm in General",
|
|
"start_index": 470,
|
|
"end_index": 475
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 475,
|
|
"end_index": 480
|
|
},
|
|
{
|
|
"title": "Approximate Inference",
|
|
"start_index": 481,
|
|
"end_index": 482,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Variational Inference",
|
|
"start_index": 482,
|
|
"end_index": 484,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Factorized distributions",
|
|
"start_index": 484,
|
|
"end_index": 486
|
|
},
|
|
{
|
|
"title": "Properties of factorized approximations",
|
|
"start_index": 486,
|
|
"end_index": 490
|
|
},
|
|
{
|
|
"title": "Example: The univariate Gaussian",
|
|
"start_index": 490,
|
|
"end_index": 493
|
|
},
|
|
{
|
|
"title": "Model comparison",
|
|
"start_index": 493,
|
|
"end_index": 494
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Illustration: Variational Mixture of Gaussians",
|
|
"start_index": 494,
|
|
"end_index": 495,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Variational distribution",
|
|
"start_index": 495,
|
|
"end_index": 501
|
|
},
|
|
{
|
|
"title": "Variational lower bound",
|
|
"start_index": 501,
|
|
"end_index": 502
|
|
},
|
|
{
|
|
"title": "Predictive density",
|
|
"start_index": 502,
|
|
"end_index": 503
|
|
},
|
|
{
|
|
"title": "Determining the number of components",
|
|
"start_index": 503,
|
|
"end_index": 505
|
|
},
|
|
{
|
|
"title": "Induced factorizations",
|
|
"start_index": 505,
|
|
"end_index": 506
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Variational Linear Regression",
|
|
"start_index": 506,
|
|
"end_index": 506,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Variational distribution",
|
|
"start_index": 506,
|
|
"end_index": 508
|
|
},
|
|
{
|
|
"title": "Predictive distribution",
|
|
"start_index": 508,
|
|
"end_index": 509
|
|
},
|
|
{
|
|
"title": "Lower bound",
|
|
"start_index": 509,
|
|
"end_index": 510
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exponential Family Distributions",
|
|
"start_index": 510,
|
|
"end_index": 511,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Variational message passing",
|
|
"start_index": 511,
|
|
"end_index": 512
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Local Variational Methods",
|
|
"start_index": 513,
|
|
"end_index": 518
|
|
},
|
|
{
|
|
"title": "Variational Logistic Regression",
|
|
"start_index": 518,
|
|
"end_index": 518,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Variational posterior distribution",
|
|
"start_index": 518,
|
|
"end_index": 520
|
|
},
|
|
{
|
|
"title": "Optimizing the variational parameters",
|
|
"start_index": 520,
|
|
"end_index": 522
|
|
},
|
|
{
|
|
"title": "Inference of hyperparameters",
|
|
"start_index": 522,
|
|
"end_index": 525
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Expectation Propagation",
|
|
"start_index": 525,
|
|
"end_index": 531,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Example: The clutter problem",
|
|
"start_index": 531,
|
|
"end_index": 533
|
|
},
|
|
{
|
|
"title": "Expectation propagation on graphs",
|
|
"start_index": 533,
|
|
"end_index": 537
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 537,
|
|
"end_index": 542
|
|
},
|
|
{
|
|
"title": "Sampling Methods",
|
|
"start_index": 543,
|
|
"end_index": 546,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Basic Sampling Algorithms",
|
|
"start_index": 546,
|
|
"end_index": 546,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Standard distributions",
|
|
"start_index": 546,
|
|
"end_index": 548
|
|
},
|
|
{
|
|
"title": "Rejection sampling",
|
|
"start_index": 548,
|
|
"end_index": 550
|
|
},
|
|
{
|
|
"title": "Adaptive rejection sampling",
|
|
"start_index": 550,
|
|
"end_index": 552
|
|
},
|
|
{
|
|
"title": "Importance sampling",
|
|
"start_index": 552,
|
|
"end_index": 554
|
|
},
|
|
{
|
|
"title": "Sampling-importance-resampling",
|
|
"start_index": 554,
|
|
"end_index": 556
|
|
},
|
|
{
|
|
"title": "Sampling and the EM algorithm",
|
|
"start_index": 556,
|
|
"end_index": 556
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Markov Chain Monte Carlo",
|
|
"start_index": 557,
|
|
"end_index": 559,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Markov chains",
|
|
"start_index": 559,
|
|
"end_index": 561
|
|
},
|
|
{
|
|
"title": "The Metropolis-Hastings algorithm",
|
|
"start_index": 561,
|
|
"end_index": 562
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Gibbs Sampling",
|
|
"start_index": 562,
|
|
"end_index": 566
|
|
},
|
|
{
|
|
"title": "Slice Sampling",
|
|
"start_index": 566,
|
|
"end_index": 568
|
|
},
|
|
{
|
|
"title": "The Hybrid Monte Carlo Algorithm",
|
|
"start_index": 568,
|
|
"end_index": 568,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Dynamical systems",
|
|
"start_index": 568,
|
|
"end_index": 572
|
|
},
|
|
{
|
|
"title": "Hybrid Monte Carlo",
|
|
"start_index": 572,
|
|
"end_index": 574
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Estimating the Partition Function",
|
|
"start_index": 574,
|
|
"end_index": 576
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 576,
|
|
"end_index": 579
|
|
},
|
|
{
|
|
"title": "Continuous Latent Variables",
|
|
"start_index": 579,
|
|
"end_index": 581,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Principal Component Analysis",
|
|
"start_index": 581,
|
|
"end_index": 581,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum variance formulation",
|
|
"start_index": 581,
|
|
"end_index": 583
|
|
},
|
|
{
|
|
"title": "Minimum-error formulation",
|
|
"start_index": 583,
|
|
"end_index": 585
|
|
},
|
|
{
|
|
"title": "Applications of PCA",
|
|
"start_index": 585,
|
|
"end_index": 589
|
|
},
|
|
{
|
|
"title": "PCA for high-dimensional data",
|
|
"start_index": 589,
|
|
"end_index": 590
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Probabilistic PCA",
|
|
"start_index": 590,
|
|
"end_index": 594,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum likelihood PCA",
|
|
"start_index": 594,
|
|
"end_index": 597
|
|
},
|
|
{
|
|
"title": "EM algorithm for PCA",
|
|
"start_index": 597,
|
|
"end_index": 600
|
|
},
|
|
{
|
|
"title": "Bayesian PCA",
|
|
"start_index": 600,
|
|
"end_index": 603
|
|
},
|
|
{
|
|
"title": "Factor analysis",
|
|
"start_index": 603,
|
|
"end_index": 606
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Kernel PCA",
|
|
"start_index": 606,
|
|
"end_index": 610
|
|
},
|
|
{
|
|
"title": "Nonlinear Latent Variable Models",
|
|
"start_index": 611,
|
|
"end_index": 611,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Independent component analysis",
|
|
"start_index": 611,
|
|
"end_index": 612
|
|
},
|
|
{
|
|
"title": "Autoassociative neural networks",
|
|
"start_index": 612,
|
|
"end_index": 615
|
|
},
|
|
{
|
|
"title": "Modelling nonlinear manifolds",
|
|
"start_index": 615,
|
|
"end_index": 619
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 619,
|
|
"end_index": 624
|
|
},
|
|
{
|
|
"title": "Sequential Data",
|
|
"start_index": 625,
|
|
"end_index": 627,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Markov Models",
|
|
"start_index": 627,
|
|
"end_index": 630
|
|
},
|
|
{
|
|
"title": "Hidden Markov Models",
|
|
"start_index": 630,
|
|
"end_index": 635,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Maximum likelihood for the HMM",
|
|
"start_index": 635,
|
|
"end_index": 638
|
|
},
|
|
{
|
|
"title": "The forward-backward algorithm",
|
|
"start_index": 638,
|
|
"end_index": 645
|
|
},
|
|
{
|
|
"title": "The sum-product algorithm for the HMM",
|
|
"start_index": 645,
|
|
"end_index": 647
|
|
},
|
|
{
|
|
"title": "Scaling factors",
|
|
"start_index": 647,
|
|
"end_index": 649
|
|
},
|
|
{
|
|
"title": "The Viterbi algorithm",
|
|
"start_index": 649,
|
|
"end_index": 651
|
|
},
|
|
{
|
|
"title": "Extensions of the hidden Markov model",
|
|
"start_index": 651,
|
|
"end_index": 655
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Linear Dynamical Systems",
|
|
"start_index": 655,
|
|
"end_index": 658,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Inference in LDS",
|
|
"start_index": 658,
|
|
"end_index": 662
|
|
},
|
|
{
|
|
"title": "Learning in LDS",
|
|
"start_index": 662,
|
|
"end_index": 664
|
|
},
|
|
{
|
|
"title": "Extensions of LDS",
|
|
"start_index": 664,
|
|
"end_index": 665
|
|
},
|
|
{
|
|
"title": "Particle filters",
|
|
"start_index": 665,
|
|
"end_index": 666
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 666,
|
|
"end_index": 672
|
|
},
|
|
{
|
|
"title": "Combining Models",
|
|
"start_index": 673,
|
|
"end_index": 674,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Bayesian Model Averaging",
|
|
"start_index": 674,
|
|
"end_index": 675
|
|
},
|
|
{
|
|
"title": "Committees",
|
|
"start_index": 675,
|
|
"end_index": 677
|
|
},
|
|
{
|
|
"title": "Boosting",
|
|
"start_index": 677,
|
|
"end_index": 679,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Minimizing exponential error",
|
|
"start_index": 679,
|
|
"end_index": 681
|
|
},
|
|
{
|
|
"title": "Error functions for boosting",
|
|
"start_index": 681,
|
|
"end_index": 683
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Tree-based Models",
|
|
"start_index": 683,
|
|
"end_index": 686
|
|
},
|
|
{
|
|
"title": "Conditional Mixture Models",
|
|
"start_index": 686,
|
|
"end_index": 687,
|
|
"child_nodes": [
|
|
{
|
|
"title": "Mixtures of linear regression models",
|
|
"start_index": 687,
|
|
"end_index": 690
|
|
},
|
|
{
|
|
"title": "Mixtures of logistic models",
|
|
"start_index": 690,
|
|
"end_index": 692
|
|
},
|
|
{
|
|
"title": "Mixtures of experts",
|
|
"start_index": 692,
|
|
"end_index": 694
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Exercises",
|
|
"start_index": 694,
|
|
"end_index": 696
|
|
},
|
|
{
|
|
"title": "Appendix A Data Sets",
|
|
"start_index": 697,
|
|
"end_index": 704
|
|
},
|
|
{
|
|
"title": "Appendix B Probability Distributions",
|
|
"start_index": 705,
|
|
"end_index": 714
|
|
},
|
|
{
|
|
"title": "Appendix C Properties of Matrices",
|
|
"start_index": 715,
|
|
"end_index": 722
|
|
},
|
|
{
|
|
"title": "Appendix D Calculus of Variations",
|
|
"start_index": 723,
|
|
"end_index": 726
|
|
},
|
|
{
|
|
"title": "Appendix E Lagrange Multipliers",
|
|
"start_index": 727,
|
|
"end_index": 730
|
|
},
|
|
{
|
|
"title": "References",
|
|
"start_index": 731,
|
|
"end_index": 749
|
|
},
|
|
{
|
|
"title": "Index",
|
|
"start_index": 749,
|
|
"end_index": 758
|
|
}
|
|
] |