diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..128dfca3 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -68,22 +68,21 @@ class SparseGP(GP): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) self._update_gradients_Z(add=False) - def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ Make a prediction for the latent function values """ if X_variance_new is None: - Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + Kx = self.kern.K(self.Z, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(Xnew) var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) else: - # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d81674b..edb82ef0 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -46,7 +46,7 @@ class Add(Kern): [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 5083c8de..b3765774 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -43,16 +43,16 @@ class Linear(Kern): assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim" else: variances = np.ones(self.input_dim) - + self.variances = Param('variances', variances, Logexp()) - self.variances.gradient = np.zeros(self.variances.shape) + #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) self.variances.add_observer(self, self.update_variance) # initialize cache self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._X, self._X2 = np.empty(shape=(2, 1)) - + def update_variance(self, v): self.variances2 = np.square(self.variances) @@ -62,7 +62,7 @@ class Linear(Kern): def update_gradients_full(self, dL_dK, X): self.variances.gradient[:] = 0 self._param_grad_helper(dL_dK, X, None, self.variances.gradient) - + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): tmp = dL_dKdiag[:, None] * X ** 2 if self.ARD: @@ -71,7 +71,7 @@ class Linear(Kern): self.variances.gradient = tmp.sum() self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) - + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): self._psi_computations(Z, mu, S) # psi0: @@ -87,7 +87,7 @@ class Linear(Kern): #from Kmm self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - + def K(self, X, X2, target): if self.ARD: XX = X * np.sqrt(self.variances) @@ -224,7 +224,7 @@ class Linear(Kern): weave_options = {'headers' : [''], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - + N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], @@ -281,7 +281,7 @@ class Linear(Kern): self._X2 = None else: self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e0d069b2..67637770 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -36,38 +36,33 @@ class Prod(Kern): self._params = None def K(self, X, X2=None): - self._K_computations(X,X2) + self._K_computations(X, X2) return self._K1 * self._K2 + def Kdiag(self, X): + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def Kdiag(self, X): - """Compute the diagonal of the covariance matrix associated to X.""" - return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) + self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) - def update_gradients_sparse(self): - pass - #wtf goes here?? - #def dKdiag_dtheta(self,dL_dKdiag,X,target): - #K1 = np.zeros(X.shape[0]) - #K2 = np.zeros(X.shape[0]) - #self.k1.Kdiag(X[:,self.slice1],K1) - #self.k2.Kdiag(X[:,self.slice2],K2) - #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2): + def gradients_X(self, dL_dK, X, X2=None): """derivative of the covariance matrix with respect to X.""" - self._K_computations(X,X2) + self._K_computations(X, X2) + target = np.zeros(X.shape) if X2 is None: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) else: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) + + return target def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) @@ -78,7 +73,7 @@ class Prod(Kern): self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - def _K_computations(self,X,X2): + def _K_computations(self, X, X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): self._X = X.copy() self._params == self._get_params().copy() diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index eb713433..02640fdc 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -154,7 +154,7 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2): + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 2be73389..d20e2fe1 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -20,9 +20,8 @@ class White(Kern): self.input_dim = input_dim self.variance = Param('variance', variance, Logexp()) self.add_parameters(self.variance) - self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2): + def K(self, X, X2=None): if X2 is None: return np.eye(X.shape[0])*self.variance else: