James and Nicolos massive Yak shaving session

2026-06-14 15:25:15 +02:00 · 2013-04-26 19:32:33 +01:00 · 2013-04-26 19:32:33 +01:00 · 9fb090a508
commit 9fb090a508
parent 3cb445886d
9 changed files with 90 additions and 64 deletions
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -289,9 +289,11 @@ class kern(parameterised):
        assert X.shape[1] == self.D
        slices1, slices2 = self._process_slices(slices1, slices2)
        if X2 is None:
-            X2 = X
-        target = np.zeros((X.shape[0], X2.shape[0]))
-        [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+            target = np.zeros((X.shape[0], X.shape[0]))
+            [p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
+        else:
+            target = np.zeros((X.shape[0], X2.shape[0]))
+            [p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
        return target

    def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
@ -308,10 +310,12 @@ class kern(parameterised):
        """
        assert X.shape[1] == self.D
        slices1, slices2 = self._process_slices(slices1, slices2)
-        if X2 is None:
-            X2 = X
        target = np.zeros(self.Nparam)
-        [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
+        if X2 is None:
+            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
+        else:
+            [p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
+

        return self._transform_gradients(target)

--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@ -4,6 +4,7 @@

 from kernpart import kernpart
 import numpy as np
+from ..util.linalg import tdot

 class linear(kernpart):
    """
@ -65,8 +66,11 @@ class linear(kernpart):
    def K(self,X,X2,target):
        if self.ARD:
            XX = X*np.sqrt(self.variances)
-            XX2 = X2*np.sqrt(self.variances)
-            target += np.dot(XX, XX2.T)
+            if X2 is None:
+                target += tdot(XX)
+            else:
+                XX2 = X2*np.sqrt(self.variances)
+                target += np.dot(XX, XX2.T)
        else:
            self._K_computations(X, X2)
            target += self.variances * self._dot_product
@ -76,8 +80,11 @@ class linear(kernpart):

    def dK_dtheta(self,dL_dK,X,X2,target):
        if self.ARD:
-            product = X[:,None,:]*X2[None,:,:]
-            target += (dL_dK[:,:,None]*product).sum(0).sum(0)
+            if X2 is None:
+                [np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
+            else:
+                product = X[:,None,:]*X2[None,:,:]
+                target += (dL_dK[:,:,None]*product).sum(0).sum(0)
        else:
            self._K_computations(X, X2)
            target += np.sum(self._dot_product*dL_dK)
@ -133,9 +140,9 @@ class linear(kernpart):
        returns N,M,M matrix
        """
        self._psi_computations(Z,mu,S)
-        psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
-        target += psi2.sum(-1)
-        #TODO: this could be faster using np.tensordot
+        #psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
+        #target += psi2.sum(-1)
+        target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T

    def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
        self._psi_computations(Z,mu,S)
@ -156,28 +163,30 @@ class linear(kernpart):
        self._psi_computations(Z,mu,S)
        mu2_S = np.sum(self.mu2_S,0)# Q,
        target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
+        #TODO: tensordot would gain some time here

    #---------------------------------------#
    #            Precomputations            #
    #---------------------------------------#

    def _K_computations(self,X,X2):
-        if X2 is None:
-            X2 = X
-        if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
-            self._Xcache = X
-            self._X2cache = X2
-            self._dot_product = np.dot(X,X2.T)
-        else:
-            # print "Cache hit!"
-            pass # TODO: insert debug message here (logging framework)
+        if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
+            self._Xcache = X.copy()
+            if X2 is None:
+                self._dot_product = tdot(X)
+                self._X2cache = None
+            else:
+                self._X2cache = X2.copy()
+                self._dot_product = np.dot(X,X2.T)

    def _psi_computations(self,Z,mu,S):
        #here are the "statistics" for psi1 and psi2
        if not np.all(Z==self._Z):
            #Z has changed, compute Z specific stuff
-            self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
-            self._Z = Z
+            #self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+            self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
+            [tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
+            self._Z = Z.copy()
        if not (np.all(mu==self._mu) and np.all(S==self._S)):
            self.mu2_S = np.square(mu)+S
-            self._mu, self._S = mu, S
+            self._mu, self._S = mu.copy(), S.copy()
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@ -6,6 +6,7 @@ from kernpart import kernpart
 import numpy as np
 import hashlib
 from scipy import weave
+from ..util.linalg import tdot

 class rbf(kernpart):
    """
@ -74,11 +75,8 @@ class rbf(kernpart):
            return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]

    def K(self,X,X2,target):
-        if X2 is None:
-            X2 = X
-
        self._K_computations(X,X2)
-        np.add(self.variance*self._K_dvar, target,target)
+        target += self.variance*self._K_dvar

    def Kdiag(self,X,target):
        np.add(target,self.variance,target)
@ -87,6 +85,7 @@ class rbf(kernpart):
        self._K_computations(X,X2)
        target[0] += np.sum(self._K_dvar*dL_dK)
        if self.ARD:
+            if X2 is None: X2 = X
            [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
        else:
            target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@ -182,29 +181,31 @@ class rbf(kernpart):
    #---------------------------------------#

    def _K_computations(self,X,X2):
-        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
+        if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
            self._X = X.copy()
-            self._X2 = X2.copy()
            self._params == self._get_params().copy()
-            if X2 is None: X2 = X
-            #never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
-            #_K_dist = X[:,None,:]-X2[None,:,:]
-            #_K_dist2 = np.square(_K_dist/self.lengthscale)
-            X = X/self.lengthscale
-            X2 = X2/self.lengthscale
-            self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
+            if X2 is None:
+                self._X2 = None
+                X = X/self.lengthscale
+                Xsquare = np.sum(np.square(X),1)
+                self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:])
+            else:
+                self._X2 = X2.copy()
+                X = X/self.lengthscale
+                X2 = X2/self.lengthscale
+                self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
            self._K_dvar = np.exp(-0.5*self._K_dist2)

    def _psi_computations(self,Z,mu,S):
        #here are the "statistics" for psi1 and psi2
-        if not np.all(Z==self._Z):
+        if not np.array_equal(Z, self._Z):
            #Z has changed, compute Z specific stuff
            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
            self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
            self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
            self._Z = Z

-        if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
+        if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
            #something's changed. recompute EVERYTHING

            #psi1
--- a/GPy/kern/white.py
+++ b/GPy/kern/white.py
@ -30,17 +30,15 @@ class white(kernpart):
        return ['variance']

    def K(self,X,X2,target):
-        if X.shape==X2.shape:
-            if np.all(X==X2):
-                np.add(target,np.eye(X.shape[0])*self.variance,target)
+        if X2 is None:
+            target += np.eye(X.shape[0])*self.variance

    def Kdiag(self,X,target):
        target += self.variance

    def dK_dtheta(self,dL_dK,X,X2,target):
-        if X.shape==X2.shape:
-            if np.all(X==X2):
-                target += np.trace(dL_dK)
+        if X2 is None:
+            target += np.trace(dL_dK)

    def dKdiag_dtheta(self,dL_dKdiag,X,target):
        target += np.sum(dL_dKdiag)
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@ -30,7 +30,7 @@ class Gaussian(likelihood):
            self.trYYT = np.trace(self.YYT)
        else:
            self.YYT = None
-            self.trYYT = None
+            self.trYYT = np.sum(np.square(self.Y))

    def _get_params(self):
        return np.asarray(self._variance)
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -33,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
            X = self.initialise_latent(init, Q, Y)

        if X_variance is None:
-            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
+            X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)

        if Z is None:
            Z = np.random.permutation(X.copy())[:M]
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -3,7 +3,7 @@

 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
@ -50,9 +50,6 @@ class sparse_GP(GP):
            self.has_uncertain_inputs=True
            self.X_variance = X_variance

-        if not self.likelihood.is_heteroscedastic:
-            self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
-
        GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)

        #normalize X uncertainty also
@ -86,13 +83,15 @@ class sparse_GP(GP):
                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
            else:
                tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
-                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                self.psi2_beta_scaled = tdot(tmp)
        else:
            if self.has_uncertain_inputs:
                self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
            else:
                tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
-                self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                #self.psi2_beta_scaled = np.dot(tmp,tmp.T)
+                self.psi2_beta_scaled = tdot(tmp)

        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)

@ -110,10 +109,11 @@ class sparse_GP(GP):
        self.psi1V = np.dot(self.psi1, self.V)
        tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
        self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
+        #TODO: can we multiply in C by forwardsubstitution?
        self.Cpsi1V = np.dot(self.C,self.psi1V)
        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
-        #self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
-        self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
+        #self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
+        self.E = tdot(self.Cpsi1V/sf)

        # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
        self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
@ -166,9 +166,9 @@ class sparse_GP(GP):
            #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
        else:
            #likelihood is not heterscedatic
-            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
+            self.partial_for_likelihood =   - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
            self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
-            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
+            self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
            self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))


--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
        bias = GPy.kern.bias(2)
        self.check_model_with_white(bias, model_type='GP_regression', dimension=2)

+    def test_GP_regression_linear_kern_1D_ARD(self):
+        ''' Testing the GP regression with linear kernel on 1d data '''
+        linear = GPy.kern.linear(1,ARD=True)
+        self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
+
+    def test_GP_regression_linear_kern_2D_ARD(self):
+        ''' Testing the GP regression with linear kernel on 2d data '''
+        linear = GPy.kern.linear(2,ARD=True)
+        self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
+
    def test_GP_regression_linear_kern_1D(self):
        ''' Testing the GP regression with linear kernel on 1d data '''
        linear = GPy.kern.linear(1)
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -196,7 +196,7 @@ def tdot_blas(mat, out=None):
    if (mat.dtype != 'float64') or (len(mat.shape) != 2):
        return np.dot(mat, mat.T)
    nn = mat.shape[0]
-    if not out:
+    if out is None:
        out = np.zeros((nn,nn))
    else:
        assert(out.dtype == 'float64')
@ -211,7 +211,7 @@ def tdot_blas(mat, out=None):
    # could avoid the copy. I also thought swapping to cblas API would allow use
    # of C order. However, I tried that and had errors with large matrices:
    # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
-    mat = mat.copy(order='F')
+    mat = np.asfortranarray(mat)
    TRANS = c_char('n')
    N = c_int(mat.shape[0])
    K = c_int(mat.shape[1])
@ -225,7 +225,7 @@ def tdot_blas(mat, out=None):
    _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
            byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))

-    symmetrify(out.T)
+    symmetrify(out,upper=True)

    return out

@ -235,7 +235,7 @@ def tdot(*args, **kwargs):
    else:
        return tdot_numpy(*args,**kwargs)

-def symmetrify(A):
+def symmetrify(A,upper=False):
    """
    Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper

@ -257,9 +257,13 @@ def symmetrify(A):
      }
    }
    """
-    if A.flags['C_CONTIGUOUS']:
+    if A.flags['C_CONTIGUOUS'] and upper:
+        weave.inline(f_contig_code,['A','N'])
+    elif A.flags['C_CONTIGUOUS'] and not upper:
        weave.inline(c_contig_code,['A','N'])
-    elif A.flags['F_CONTIGUOUS']:
+    elif A.flags['F_CONTIGUOUS'] and upper:
+        weave.inline(c_contig_code,['A','N'])
+    elif A.flags['F_CONTIGUOUS'] and not upper:
        weave.inline(f_contig_code,['A','N'])
    else:
        tmp = np.tril(A)