diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7677fea2..91d1f205 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -70,7 +70,7 @@ class SparseGP(GP): #gradients wrt Z self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations( - self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpis2'], Z=self.Z, variational_posterior=self.X) + self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) else: #gradients wrt kernel target = np.zeros(self.kern.size) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a2686d73..eab9a965 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -274,6 +274,7 @@ def bgplvm_simulation(optimize=True, verbose=1, _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + #k = kern.RBF(Q, ARD=True, lengthscale=10.) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) if optimize: @@ -281,7 +282,7 @@ def bgplvm_simulation(optimize=True, verbose=1, m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - m.q.plot("BGPLVM Latent Space 1D") + m.X.plot("BGPLVM Latent Space 1D") m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') return m @@ -302,7 +303,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan - m.q.variance *= .1 + m.X.variance *= .1 m.parameters_changed() m.Yreal = Y @@ -311,7 +312,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - m.q.plot("BGPLVM Latent Space 1D") + m.X.plot("BGPLVM Latent Space 1D") m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') return m diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index e503180a..44f71d85 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -22,22 +22,25 @@ class Linear(Kern): :param input_dim: the number of input dimensions :type input_dim: int :param variances: the vector of variances :math:`\sigma^2_i` - :type variances: array or list of the appropriate size (or float if there is only one variance parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. + :type variances: array or list of the appropriate size (or float if there + is only one variance parameter) + :param ARD: Auto Relevance Determination. If False, the kernel has only one + variance parameter \sigma^2, otherwise there is one variance + parameter per dimension. :type ARD: Boolean :rtype: kernel object + """ def __init__(self, input_dim, variances=None, ARD=False, name='linear'): super(Linear, self).__init__(input_dim, name) self.ARD = ARD - if ARD == False: + if not ARD: if variances is not None: variances = np.asarray(variances) assert variances.size == 1, "Only one variance needed for non-ARD kernel" else: variances = np.ones(1) - self._Xcache, self._X2cache = np.empty(shape=(2,)) else: if variances is not None: variances = np.asarray(variances) @@ -103,7 +106,6 @@ class Linear(Kern): #---------------------------------------# # PSI statistics # - # variational # #---------------------------------------# def psi0(self, Z, variational_posterior): @@ -118,18 +120,16 @@ class Linear(Kern): return np.dot(ZAinner, ZA.T) def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - mu, S = variational_posterior.mean, variational_posterior.variance + #psi1 + self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z) # psi0: tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) - if self.ARD: grad = tmp.sum(0) - else: grad = np.atleast_1d(tmp.sum()) - #psi1 - self.update_gradients_full(dL_dpsi1, mu, Z) - grad += self.variances.gradient + if self.ARD: self.variances.gradient += tmp.sum(0) + else: self.variances.gradient += tmp.sum() #psi2 tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) - if self.ARD: grad += tmp.sum(0).sum(0).sum(0) - else: grad += tmp.sum() + if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) + else: self.variances.gradient += tmp.sum() def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): #psi1 @@ -155,7 +155,7 @@ class Linear(Kern): #--------------------------------------------------# - def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S): + def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S): # Think N,num_inducing,num_inducing,input_dim ZA = Z * self.variances AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] @@ -198,16 +198,15 @@ class Linear(Kern): weave_options = {'headers' : [''], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - - mu = pv.mean + mu = vp.mean N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) - def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): - AZA = self.variances*self._ZAinner(pv, Z) + def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target): + AZA = self.variances*self._ZAinner(vp, Z) code=""" int n,m,mm,q; #pragma omp parallel for private(n,mm,q) @@ -229,23 +228,23 @@ class Linear(Kern): 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] - mu = param_to_array(pv.mean) + N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1] + mu = param_to_array(vp.mean) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) - def _mu2S(self, pv): - return np.square(pv.mean) + pv.variance + def _mu2S(self, vp): + return np.square(vp.mean) + vp.variance - def _ZAinner(self, pv, Z): + def _ZAinner(self, vp, Z): ZA = Z*self.variances - inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) - diag_indices = np.diag_indices(pv.mean.shape[1], 2) - inner[:, diag_indices[0], diag_indices[1]] += pv.variance + inner = (vp.mean[:, None, :] * vp.mean[:, :, None]) + diag_indices = np.diag_indices(vp.mean.shape[1], 2) + inner[:, diag_indices[0], diag_indices[1]] += vp.variance - return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! + return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]! def input_sensitivity(self): if self.ARD: return self.variances diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 366995dc..5fed767b 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -66,7 +66,7 @@ class BayesianGPLVM(SparseGP): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) - self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(variational_posterior=self.X, Z=self.Z, **self.grad_dict) + self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.X)