diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 3c3d59e6..ae587202 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -12,7 +12,7 @@ class rbf(kernpart): .. math:: - k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}} + k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2} where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index 430c2718..a18ec9bb 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -85,5 +85,5 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) def plot_latent(self, *args, **kwargs): - input_1, input_2 = GPLVM.plot_latent(*args, **kwargs) - pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w') + input_1, input_2 = GPLVM.plot_latent(self, *args, **kwargs) + pb.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index b44801fc..5be54049 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -81,13 +81,16 @@ class GPLVM(GP): raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" k = k[0] if k.name=='rbf': - input_1, input_2 = np.argsort(k.lengthscales)[:2] + input_1, input_2 = np.argsort(k.lengthscale)[:2] elif k.name=='linear': input_1, input_2 = np.argsort(k.variances)[::-1][:2] #first, plot the output variance as a function of the latent space Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) - mu, var, low, up = self.predict(Xtest) + Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) + Xtest_full[:, :2] = Xtest + mu, var, low, up = self.predict(Xtest_full) + var = var[:, :2] pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst index 672bc1e7..8ebf8b8f 100644 --- a/doc/tuto_creating_new_kernels.rst +++ b/doc/tuto_creating_new_kernels.rst @@ -22,7 +22,7 @@ We advise the reader to start with copy-pasting an existing kernel and to modify **Header** -The header is similar to all kernels:: +The header is similar to all kernels: :: from kernpart import kernpart import numpy as np @@ -35,7 +35,7 @@ The implementation of this function in mandatory. For all kernparts the first parameter ``D`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel. -The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer).:: +The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer). :: def __init__(self,D,variance=1.,lengthscale=1.,power=1.): assert D == 1, "For this kernel we assume D=1" @@ -50,7 +50,7 @@ The following attributes are compulsory: ``self.D`` (the dimension, integer), `` The implementation of this function in mandatory. -This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters.:: +This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters. :: def _get_params(self): return np.hstack((self.variance,self.lengthscale,self.power)) @@ -59,7 +59,7 @@ This function returns a one dimensional array of length ``self.Nparam`` containi The implementation of this function in mandatory. -The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...).:: +The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...). :: def _set_params(self,x): self.variance = x[0] @@ -70,7 +70,7 @@ The input is a one dimensional array of length ``self.Nparam`` containing the va The implementation of this function in mandatory. -It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names.:: +It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names. :: def _get_param_names(self): return ['variance','lengthscale','power'] @@ -79,7 +79,7 @@ It returns a list of strings of length ``self.Nparam`` corresponding to the para The implementation of this function in mandatory. -This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use.:: +This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use. :: def K(self,X,X2,target): if X2 is None: X2 = X @@ -90,7 +90,7 @@ This function is used to compute the covariance matrix associated with the input The implementation of this function in mandatory. -This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`.:: +This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`. :: def Kdiag(self,X,target): target += self.variance @@ -100,7 +100,7 @@ This function is similar to ``K`` but it computes only the values of the kernel This function is required for the optimization of the parameters. -Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second.:: +Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second. :: def dK_dtheta(self,dL_dK,X,X2,target): if X2 is None: X2 = X @@ -119,7 +119,7 @@ Computes the derivative of the likelihood. As previously, the values are added t This function is required for BGPLVM, sparse models and uncertain inputs. -As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element.:: +As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element. :: def dKdiag_dtheta(self,dL_dKdiag,X,target): target[0] += np.sum(dL_dKdiag) @@ -129,7 +129,7 @@ As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} This function is required for GPLVM, BGPLVM, sparse models and uncertain inputs. -Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array.:: +Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array. :: def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" @@ -141,7 +141,7 @@ Computes the derivative of the likelihood with respect to the inputs ``X`` (a :m **dKdiag_dX(self,dL_dKdiag,X,target)** -This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target.:: +This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target. :: def dKdiag_dX(self,dL_dKdiag,X,target): pass @@ -167,7 +167,7 @@ The following line should be added in the preamble of the file:: from rational_quadratic import rational_quadratic as rational_quadratic_part -as well as the following block:: +as well as the following block :: def rational_quadratic(D,variance=1., lengthscale=1., power=1.): part = rational_quadraticpart(D,variance, lengthscale, power) diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst index 370ffd95..3031a5e1 100644 --- a/doc/tuto_interacting_with_models.rst +++ b/doc/tuto_interacting_with_models.rst @@ -2,52 +2,212 @@ Interacting with models ************************************* -The GPy model class has a set of features which are designed to make it simple to explore the parameter space of the model. By default, the scipy optimisers are used to fit GPy models (via model.optimize()), for which we provide mechanisms for 'free' optimisation: GPy can ensure that naturally positive parameters (such as variances) remain positive. But these mechanisms are much more powerful than simple reparameterisation, as we shall see. +The GPy model class has a set of features which are +designed to make it simple to explore the parameter +space of the model. By default, the scipy optimisers +are used to fit GPy models (via model.optimize()), +for which we provide mechanisms for 'free' optimisation: +GPy can ensure that naturally positive parameters +(such as variances) remain positive. But these mechanisms +are much more powerful than simple reparameterisation, +as we shall see. -All of the examples included in GPy return an instance of a model class. We'll use GPy.examples.?? as an example:: +Along this tutorial we'll use a sparse GP regression model +as example. This example can be in ``GPy.examples.regression``. +All of the examples included in GPy return an instance +of a model class, and therefore they can be called in +the following way: :: import pylab as pb pb.ion() import GPy - m = GPy.examples.?? + m = GPy.examples.regression.sparse_GP_regression_1D() Examining the model using print =============================== -To see the current state of the model parameters, and the model's (marginal) likelihood just print the model:: +To see the current state of the model parameters, +and the model's (marginal) likelihood just print the model :: + print m -?? output +The first thing displayed on the screen is the log-likelihood +value of the model with its current parameters. Below the +log-likelihood, a table with all the model's parameters +is shown. For each parameter, the table contains the name +of the parameter, the current value, and in case there are +defined: constraints, ties and prior distrbutions associated. :: -Getting the model's likelihood and gradients -=========================================== -foobar + Log-likelihood: 6.309e+02 + + Name | Value | Constraints | Ties | Prior + ------------------------------------------------------------------ + iip_0_0 | -1.4671 | | | + iip_1_0 | 2.6378 | | | + iip_2_0 | -0.0396 | | | + iip_3_0 | -2.6372 | | | + iip_4_0 | 1.4704 | | | + rbf_variance | 1.5672 | (+ve) | | + rbf_lengthscale | 2.5625 | (+ve) | | + white_variance | 0.0000 | (+ve) | | + noise_variance | 0.0022 | (+ve) | | + +In this case the kernel parameters (``rbf_variance``, +``rbf_lengthscale`` and ``white_variance``) as well as +the noise parameter (``noise_variance``), are constrained +to be positive, while the inducing inputs have not +constraints associated. Also there are no ties or prior defined. Setting and fetching parameters by name ======================================= -foobar +Another way to interact with the model's parameters is through +the functions ``_get_param_names()``, ``_get_params()`` and +``_set_params()``. + +``_get_param_names()`` returns a list of the parameters names :: + + ['iip_0_0', + 'iip_1_0', + 'iip_2_0', + 'iip_3_0', + 'iip_4_0', + 'rbf_variance', + 'rbf_lengthscale', + 'white_variance', + 'noise_variance'] + +``_get_params()`` returns an array of the parameters values :: + + array([ -1.46705227e+00, 2.63782176e+00, -3.96422982e-02, + -2.63715255e+00, 1.47038653e+00, 1.56724596e+00, + 2.56248679e+00, 2.20963633e-10, 2.18379922e-03]) + +``_set_params()`` takes an array as input and substitutes +the current values of the parameters for those of the array. For example, +we can define a new array of values and change the parameters as follows: :: + + new_params = np.array([1.,2.,3.,4.,1.,1.,1.,1.,1.]) + m._set_params(new_params) + +If we call the function ``_get_params()`` again, we will obtain the new +parameters we have just set. + +Parameters can be also set by name using the function ``_set()``. For example, +lets change the lengthscale to .5: :: + + m.set('rbf_lengthscale',.5) + +``_set()`` function accepts regular expression as it first +input, and therefore all parameters matching that regular +expression are set to the given value. In this case rather +than passing as second output a single value, we can also +use a list of arrays. For example, lets change the inducing +inputs: :: + + m.set('iip',np.arange(-4,0)) + +Getting the model's likelihood and gradients +=========================================== +Appart form the printing the model, the marginal +log-likelihood can be obtained by using the function +``log_likelihood()``. Also, the log-likelihood gradients +wrt. each parameter can be obtained with the funcion +``_log_likelihood_gradients()``. :: + + m.log_likelihood() + -791.15371409346153 + + m._log_likelihood_gradients() + array([ 7.08278455e-03, 1.37118783e+01, 2.66948031e+00, + 3.50184014e+00, 7.08278455e-03, -1.43501702e+02, + 6.10662266e+01, -2.18472649e+02, 2.14663691e+02]) + +Removing the model's constraints +================================ +When we initially call the example, it was optimized and hence the +log-likelihood gradients were close to zero. However, since +we have been changing the parameters, the gradients are far from zero now. +Next we are going to show how to optimize the model setting different +restrictions on the parameters. + +Once a constrain has been set on a parameter, it is not possible to +define a new constraint for it unless we explicitly remove the previous +one. The command to remove the constraints is ``unconstrain()``, and +just as the ``set()`` command, it also accepts regular expression. +In this case we will remove all the constraints: :: + + m.unconstrain('') Constraining and optimising the model ===================================== -A simple task in GPy is to ensure that the models' variances remain positive during optimisation. the models class has a function called constrain_positive(), which accepts a regex string as above. To constrain the models' variance to be positive:: - m.constrain_positive('variance') - print m +A requisite needed for some parameters, such as variances, +is to be positive. This is constraint is easily set +with the function ``constrain_positive()``. Regular expressions +are also accepted. :: -Now we see that the variance of the model is constrained to be postive. GPy handles the effective change of gradients: see how m.objective_gradients has changed approriately + m.constrain_positive('var') +For convenience, GPy also provides a catch all function +which ensures that anything which appears to require +positivity is constrianed appropriately:: -For convenience, we also provide a catch all function which ensures that anything which appears to require positivity is constrianed appropriately:: m.ensure_default_constraints() - Fixing parameters ================= +Parameters values can be fixed using ``constrain_fixed()``. +For example we can define the first inducing input to be +fixed on zero: :: + m.constrain_fixed('iip_0',0) + +Bounding parameters +=================== +Defining bounding constraints is an easily task in GPy too, +it only requires to use the function ``constrain_bounded()``. +For example, lets bound inducing inputs 2 and 3 to have +values between -4 and -1: :: + + m.constrain_bounded('iip_(1|2)',-4,-1) Tying Parameters ================ +The values of two or more parameters can be tied together, +so that they share the same value during optimization. +The function to do so is ``tie_params()``. For the example +we are using, it doesn't make sense to tie parameters together, +however for the sake of the example we will tie the white noise +and the variance together. See `A kernel overview `_. +for a proper use of the tying capabilities.:: -Bounding parameters -=================== + m.tie_params('e_var') + +Optimizing the model +==================== +Once we have finished defining the constraints, +we can now optimize the model with the function +``optimize``.:: + + m.optimize() + +We can print again the model and check the new results. +The table now shows that ``iip_0_0`` is fixed, ``iip_1_0`` +and ``iip_2_0`` are bounded and the kernel parameters are constrained to +be positive. In addition the table now indicates that +white_variance and noise_variance are tied together.:: + + Log-likelihood: 9.967e+01 + + Name | Value | Constraints | Ties | Prior + ------------------------------------------------------------------ + iip_0_0 | 0.0000 | Fixed | | + iip_1_0 | -2.8834 | (-4, -1) | | + iip_2_0 | -1.9152 | (-4, -1) | | + iip_3_0 | 1.5034 | | | + iip_4_0 | -1.0162 | | | + rbf_variance | 0.0158 | (+ve) | | + rbf_lengthscale | 0.9760 | (+ve) | | + white_variance | 0.0049 | (+ve) | (0) | + noise_variance | 0.0049 | (+ve) | (0) | Further Reading @@ -55,6 +215,3 @@ Further Reading All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models. By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??. - - - diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst index dfb7fb3f..da19803b 100644 --- a/doc/tuto_kernel_overview.rst +++ b/doc/tuto_kernel_overview.rst @@ -133,7 +133,7 @@ Various constrains can be applied to the parameters of a kernel * ``constrain_fixed`` to fix the value of a parameter (the value will not be modified during optimisation) * ``constrain_positive`` to make sure the parameter is greater than 0. * ``constrain_bounded`` to impose the parameter to be in a given range. - * ``tie_param`` to impose the value of two (or more) parameters to be equal. + * ``tie_params`` to impose the value of two (or more) parameters to be equal. When calling one of these functions, the parameters to constrain can either by specified by a regular expression that matches its name or by a number that corresponds to the rank of the parameter. Here is an example :: @@ -146,7 +146,7 @@ When calling one of these functions, the parameters to constrain can either by s k.constrain_positive('var') k.constrain_fixed(np.array([1]),1.75) - k.tie_param('len') + k.tie_params('len') k.unconstrain('white') k.constrain_bounded('white',lower=1e-5,upper=.5) print k