James and Nicolos massive Yak shaving session

This commit is contained in:
James Hensman 2013-04-26 19:32:33 +01:00
parent 3cb445886d
commit 9fb090a508
9 changed files with 90 additions and 64 deletions

View file

@ -289,9 +289,11 @@ class kern(parameterised):
assert X.shape[1] == self.D
slices1, slices2 = self._process_slices(slices1, slices2)
if X2 is None:
X2 = X
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
return target
def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
@ -308,10 +310,12 @@ class kern(parameterised):
"""
assert X.shape[1] == self.D
slices1, slices2 = self._process_slices(slices1, slices2)
if X2 is None:
X2 = X
target = np.zeros(self.Nparam)
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
if X2 is None:
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
else:
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
return self._transform_gradients(target)

View file

@ -4,6 +4,7 @@
from kernpart import kernpart
import numpy as np
from ..util.linalg import tdot
class linear(kernpart):
"""
@ -65,8 +66,11 @@ class linear(kernpart):
def K(self,X,X2,target):
if self.ARD:
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
self._K_computations(X, X2)
target += self.variances * self._dot_product
@ -76,8 +80,11 @@ class linear(kernpart):
def dK_dtheta(self,dL_dK,X,X2,target):
if self.ARD:
product = X[:,None,:]*X2[None,:,:]
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
if X2 is None:
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
else:
product = X[:,None,:]*X2[None,:,:]
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product*dL_dK)
@ -133,9 +140,9 @@ class linear(kernpart):
returns N,M,M matrix
"""
self._psi_computations(Z,mu,S)
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
target += psi2.sum(-1)
#TODO: this could be faster using np.tensordot
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
#target += psi2.sum(-1)
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
@ -156,28 +163,30 @@ class linear(kernpart):
self._psi_computations(Z,mu,S)
mu2_S = np.sum(self.mu2_S,0)# Q,
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
#TODO: tensordot would gain some time here
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self,X,X2):
if X2 is None:
X2 = X
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
self._Xcache = X
self._X2cache = X2
self._dot_product = np.dot(X,X2.T)
else:
# print "Cache hit!"
pass # TODO: insert debug message here (logging framework)
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
self._Xcache = X.copy()
if X2 is None:
self._dot_product = tdot(X)
self._X2cache = None
else:
self._X2cache = X2.copy()
self._dot_product = np.dot(X,X2.T)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self._Z = Z
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
self._Z = Z.copy()
if not (np.all(mu==self._mu) and np.all(S==self._S)):
self.mu2_S = np.square(mu)+S
self._mu, self._S = mu, S
self._mu, self._S = mu.copy(), S.copy()

View file

@ -6,6 +6,7 @@ from kernpart import kernpart
import numpy as np
import hashlib
from scipy import weave
from ..util.linalg import tdot
class rbf(kernpart):
"""
@ -74,11 +75,8 @@ class rbf(kernpart):
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
if X2 is None:
X2 = X
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
target += self.variance*self._K_dvar
def Kdiag(self,X,target):
np.add(target,self.variance,target)
@ -87,6 +85,7 @@ class rbf(kernpart):
self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*dL_dK)
if self.ARD:
if X2 is None: X2 = X
[np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
else:
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@ -182,29 +181,31 @@ class rbf(kernpart):
#---------------------------------------#
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._X2 = X2.copy()
self._params == self._get_params().copy()
if X2 is None: X2 = X
#never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
#_K_dist = X[:,None,:]-X2[None,:,:]
#_K_dist2 = np.square(_K_dist/self.lengthscale)
X = X/self.lengthscale
X2 = X2/self.lengthscale
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
if X2 is None:
self._X2 = None
X = X/self.lengthscale
Xsquare = np.sum(np.square(X),1)
self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:])
else:
self._X2 = X2.copy()
X = X/self.lengthscale
X2 = X2/self.lengthscale
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
self._K_dvar = np.exp(-0.5*self._K_dist2)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
if not np.array_equal(Z, self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
#something's changed. recompute EVERYTHING
#psi1

View file

@ -30,17 +30,15 @@ class white(kernpart):
return ['variance']
def K(self,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
np.add(target,np.eye(X.shape[0])*self.variance,target)
if X2 is None:
target += np.eye(X.shape[0])*self.variance
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
target += np.trace(dL_dK)
if X2 is None:
target += np.trace(dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)

View file

@ -30,7 +30,7 @@ class Gaussian(likelihood):
self.trYYT = np.trace(self.YYT)
else:
self.YYT = None
self.trYYT = None
self.trYYT = np.sum(np.square(self.Y))
def _get_params(self):
return np.asarray(self._variance)

View file

@ -33,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
X = self.initialise_latent(init, Q, Y)
if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:M]

View file

@ -3,7 +3,7 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot
from ..util.plot import gpplot
from .. import kern
from GP import GP
@ -50,9 +50,6 @@ class sparse_GP(GP):
self.has_uncertain_inputs=True
self.X_variance = X_variance
if not self.likelihood.is_heteroscedastic:
self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
#normalize X uncertainty also
@ -86,13 +83,15 @@ class sparse_GP(GP):
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
else:
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
@ -110,10 +109,11 @@ class sparse_GP(GP):
self.psi1V = np.dot(self.psi1, self.V)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
#TODO: can we multiply in C by forwardsubstitution?
self.Cpsi1V = np.dot(self.C,self.psi1V)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
#self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
self.E = tdot(self.Cpsi1V/sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
@ -166,9 +166,9 @@ class sparse_GP(GP):
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else:
#likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))

View file

@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
bias = GPy.kern.bias(2)
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
def test_GP_regression_linear_kern_1D_ARD(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
def test_GP_regression_linear_kern_2D_ARD(self):
''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
def test_GP_regression_linear_kern_1D(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1)

View file

@ -196,7 +196,7 @@ def tdot_blas(mat, out=None):
if (mat.dtype != 'float64') or (len(mat.shape) != 2):
return np.dot(mat, mat.T)
nn = mat.shape[0]
if not out:
if out is None:
out = np.zeros((nn,nn))
else:
assert(out.dtype == 'float64')
@ -211,7 +211,7 @@ def tdot_blas(mat, out=None):
# could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices:
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
mat = mat.copy(order='F')
mat = np.asfortranarray(mat)
TRANS = c_char('n')
N = c_int(mat.shape[0])
K = c_int(mat.shape[1])
@ -225,7 +225,7 @@ def tdot_blas(mat, out=None):
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out.T)
symmetrify(out,upper=True)
return out
@ -235,7 +235,7 @@ def tdot(*args, **kwargs):
else:
return tdot_numpy(*args,**kwargs)
def symmetrify(A):
def symmetrify(A,upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
@ -257,9 +257,13 @@ def symmetrify(A):
}
}
"""
if A.flags['C_CONTIGUOUS']:
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'])
elif A.flags['F_CONTIGUOUS']:
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'])
else:
tmp = np.tril(A)