diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 2ef07fa5..a6551e11 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -289,9 +289,11 @@ class kern(parameterised): assert X.shape[1] == self.D slices1, slices2 = self._process_slices(slices1, slices2) if X2 is None: - X2 = X - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] + target = np.zeros((X.shape[0], X.shape[0])) + [p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] + else: + target = np.zeros((X.shape[0], X2.shape[0])) + [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] return target def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None): @@ -308,10 +310,12 @@ class kern(parameterised): """ assert X.shape[1] == self.D slices1, slices2 = self._process_slices(slices1, slices2) - if X2 is None: - X2 = X target = np.zeros(self.Nparam) - [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] + if X2 is None: + [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)] + else: + [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)] + return self._transform_gradients(target) diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 78a8732a..78dbdf01 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -4,6 +4,7 @@ from kernpart import kernpart import numpy as np +from ..util.linalg import tdot class linear(kernpart): """ @@ -65,8 +66,11 @@ class linear(kernpart): def K(self,X,X2,target): if self.ARD: XX = X*np.sqrt(self.variances) - XX2 = X2*np.sqrt(self.variances) - target += np.dot(XX, XX2.T) + if X2 is None: + target += tdot(XX) + else: + XX2 = X2*np.sqrt(self.variances) + target += np.dot(XX, XX2.T) else: self._K_computations(X, X2) target += self.variances * self._dot_product @@ -76,8 +80,11 @@ class linear(kernpart): def dK_dtheta(self,dL_dK,X,X2,target): if self.ARD: - product = X[:,None,:]*X2[None,:,:] - target += (dL_dK[:,:,None]*product).sum(0).sum(0) + if X2 is None: + [np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)] + else: + product = X[:,None,:]*X2[None,:,:] + target += (dL_dK[:,:,None]*product).sum(0).sum(0) else: self._K_computations(X, X2) target += np.sum(self._dot_product*dL_dK) @@ -133,9 +140,9 @@ class linear(kernpart): returns N,M,M matrix """ self._psi_computations(Z,mu,S) - psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] - target += psi2.sum(-1) - #TODO: this could be faster using np.tensordot + #psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] + #target += psi2.sum(-1) + target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): self._psi_computations(Z,mu,S) @@ -156,28 +163,30 @@ class linear(kernpart): self._psi_computations(Z,mu,S) mu2_S = np.sum(self.mu2_S,0)# Q, target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) + #TODO: tensordot would gain some time here #---------------------------------------# # Precomputations # #---------------------------------------# def _K_computations(self,X,X2): - if X2 is None: - X2 = X - if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)): - self._Xcache = X - self._X2cache = X2 - self._dot_product = np.dot(X,X2.T) - else: - # print "Cache hit!" - pass # TODO: insert debug message here (logging framework) + if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)): + self._Xcache = X.copy() + if X2 is None: + self._dot_product = tdot(X) + self._X2cache = None + else: + self._X2cache = X2.copy() + self._dot_product = np.dot(X,X2.T) def _psi_computations(self,Z,mu,S): #here are the "statistics" for psi1 and psi2 if not np.all(Z==self._Z): #Z has changed, compute Z specific stuff - self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q - self._Z = Z + #self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q + self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F') + [tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])] + self._Z = Z.copy() if not (np.all(mu==self._mu) and np.all(S==self._S)): self.mu2_S = np.square(mu)+S - self._mu, self._S = mu, S + self._mu, self._S = mu.copy(), S.copy() diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 9ff7a93e..027e5e9e 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -6,6 +6,7 @@ from kernpart import kernpart import numpy as np import hashlib from scipy import weave +from ..util.linalg import tdot class rbf(kernpart): """ @@ -74,11 +75,8 @@ class rbf(kernpart): return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] def K(self,X,X2,target): - if X2 is None: - X2 = X - self._K_computations(X,X2) - np.add(self.variance*self._K_dvar, target,target) + target += self.variance*self._K_dvar def Kdiag(self,X,target): np.add(target,self.variance,target) @@ -87,6 +85,7 @@ class rbf(kernpart): self._K_computations(X,X2) target[0] += np.sum(self._K_dvar*dL_dK) if self.ARD: + if X2 is None: X2 = X [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] else: target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK) @@ -182,29 +181,31 @@ class rbf(kernpart): #---------------------------------------# def _K_computations(self,X,X2): - if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())): + if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): self._X = X.copy() - self._X2 = X2.copy() self._params == self._get_params().copy() - if X2 is None: X2 = X - #never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy - #_K_dist = X[:,None,:]-X2[None,:,:] - #_K_dist2 = np.square(_K_dist/self.lengthscale) - X = X/self.lengthscale - X2 = X2/self.lengthscale - self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:]) + if X2 is None: + self._X2 = None + X = X/self.lengthscale + Xsquare = np.sum(np.square(X),1) + self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:]) + else: + self._X2 = X2.copy() + X = X/self.lengthscale + X2 = X2/self.lengthscale + self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:]) self._K_dvar = np.exp(-0.5*self._K_dist2) def _psi_computations(self,Z,mu,S): #here are the "statistics" for psi1 and psi2 - if not np.all(Z==self._Z): + if not np.array_equal(Z, self._Z): #Z has changed, compute Z specific stuff self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q self._Z = Z - if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): + if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)): #something's changed. recompute EVERYTHING #psi1 diff --git a/GPy/kern/white.py b/GPy/kern/white.py index f5d6894a..be6aad45 100644 --- a/GPy/kern/white.py +++ b/GPy/kern/white.py @@ -30,17 +30,15 @@ class white(kernpart): return ['variance'] def K(self,X,X2,target): - if X.shape==X2.shape: - if np.all(X==X2): - np.add(target,np.eye(X.shape[0])*self.variance,target) + if X2 is None: + target += np.eye(X.shape[0])*self.variance def Kdiag(self,X,target): target += self.variance def dK_dtheta(self,dL_dK,X,X2,target): - if X.shape==X2.shape: - if np.all(X==X2): - target += np.trace(dL_dK) + if X2 is None: + target += np.trace(dL_dK) def dKdiag_dtheta(self,dL_dKdiag,X,target): target += np.sum(dL_dKdiag) diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py index 25d12491..d3696fa6 100644 --- a/GPy/likelihoods/Gaussian.py +++ b/GPy/likelihoods/Gaussian.py @@ -30,7 +30,7 @@ class Gaussian(likelihood): self.trYYT = np.trace(self.YYT) else: self.YYT = None - self.trYYT = None + self.trYYT = np.sum(np.square(self.Y)) def _get_params(self): return np.asarray(self._variance) diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index 0d4cf91e..6333fb1c 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -33,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): X = self.initialise_latent(init, Q, Y) if X_variance is None: - X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1) + X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) if Z is None: Z = np.random.permutation(X.copy())[:M] diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index dc77e795..697a9978 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -3,7 +3,7 @@ import numpy as np import pylab as pb -from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot +from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot from ..util.plot import gpplot from .. import kern from GP import GP @@ -50,9 +50,6 @@ class sparse_GP(GP): self.has_uncertain_inputs=True self.X_variance = X_variance - if not self.likelihood.is_heteroscedastic: - self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here? - GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices) #normalize X uncertainty also @@ -86,13 +83,15 @@ class sparse_GP(GP): self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) else: tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) - self.psi2_beta_scaled = np.dot(tmp,tmp.T) + #self.psi2_beta_scaled = np.dot(tmp,tmp.T) + self.psi2_beta_scaled = tdot(tmp) else: if self.has_uncertain_inputs: self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) else: tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) - self.psi2_beta_scaled = np.dot(tmp,tmp.T) + #self.psi2_beta_scaled = np.dot(tmp,tmp.T) + self.psi2_beta_scaled = tdot(tmp) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) @@ -110,10 +109,11 @@ class sparse_GP(GP): self.psi1V = np.dot(self.psi1, self.V) tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0] self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] + #TODO: can we multiply in C by forwardsubstitution? self.Cpsi1V = np.dot(self.C,self.psi1V) self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) - #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2 - self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf) + #self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf) + self.E = tdot(self.Cpsi1V/sf) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() @@ -166,9 +166,9 @@ class sparse_GP(GP): #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD else: #likelihood is not heterscedatic - self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 + self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) - self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable? + self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 55a1fb65..ee8368ac 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase): bias = GPy.kern.bias(2) self.check_model_with_white(bias, model_type='GP_regression', dimension=2) + def test_GP_regression_linear_kern_1D_ARD(self): + ''' Testing the GP regression with linear kernel on 1d data ''' + linear = GPy.kern.linear(1,ARD=True) + self.check_model_with_white(linear, model_type='GP_regression', dimension=1) + + def test_GP_regression_linear_kern_2D_ARD(self): + ''' Testing the GP regression with linear kernel on 2d data ''' + linear = GPy.kern.linear(2,ARD=True) + self.check_model_with_white(linear, model_type='GP_regression', dimension=2) + def test_GP_regression_linear_kern_1D(self): ''' Testing the GP regression with linear kernel on 1d data ''' linear = GPy.kern.linear(1) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 34e30dca..b19aa2b6 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -196,7 +196,7 @@ def tdot_blas(mat, out=None): if (mat.dtype != 'float64') or (len(mat.shape) != 2): return np.dot(mat, mat.T) nn = mat.shape[0] - if not out: + if out is None: out = np.zeros((nn,nn)) else: assert(out.dtype == 'float64') @@ -211,7 +211,7 @@ def tdot_blas(mat, out=None): # could avoid the copy. I also thought swapping to cblas API would allow use # of C order. However, I tried that and had errors with large matrices: # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py - mat = mat.copy(order='F') + mat = np.asfortranarray(mat) TRANS = c_char('n') N = c_int(mat.shape[0]) K = c_int(mat.shape[1]) @@ -225,7 +225,7 @@ def tdot_blas(mat, out=None): _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K), byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) - symmetrify(out.T) + symmetrify(out,upper=True) return out @@ -235,7 +235,7 @@ def tdot(*args, **kwargs): else: return tdot_numpy(*args,**kwargs) -def symmetrify(A): +def symmetrify(A,upper=False): """ Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper @@ -257,9 +257,13 @@ def symmetrify(A): } } """ - if A.flags['C_CONTIGUOUS']: + if A.flags['C_CONTIGUOUS'] and upper: + weave.inline(f_contig_code,['A','N']) + elif A.flags['C_CONTIGUOUS'] and not upper: weave.inline(c_contig_code,['A','N']) - elif A.flags['F_CONTIGUOUS']: + elif A.flags['F_CONTIGUOUS'] and upper: + weave.inline(c_contig_code,['A','N']) + elif A.flags['F_CONTIGUOUS'] and not upper: weave.inline(f_contig_code,['A','N']) else: tmp = np.tril(A)