mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-14 15:25:15 +02:00
James and Nicolos massive Yak shaving session
This commit is contained in:
parent
3cb445886d
commit
9fb090a508
9 changed files with 90 additions and 64 deletions
|
|
@ -289,9 +289,11 @@ class kern(parameterised):
|
|||
assert X.shape[1] == self.D
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[p.K(X[s1, i_s], None, target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[p.K(X[s1, i_s], X2[s2, i_s], target=target[s1, s2]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
||||
return target
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2=None, slices1=None, slices2=None):
|
||||
|
|
@ -308,10 +310,12 @@ class kern(parameterised):
|
|||
"""
|
||||
assert X.shape[1] == self.D
|
||||
slices1, slices2 = self._process_slices(slices1, slices2)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros(self.Nparam)
|
||||
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
|
||||
if X2 is None:
|
||||
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], None, target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
|
||||
else:
|
||||
[p.dK_dtheta(dL_dK[s1, s2], X[s1, i_s], X2[s2, i_s], target[ps]) for p, i_s, ps, s1, s2 in zip(self.parts, self.input_slices,self.param_slices, slices1, slices2)]
|
||||
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
from ..util.linalg import tdot
|
||||
|
||||
class linear(kernpart):
|
||||
"""
|
||||
|
|
@ -65,8 +66,11 @@ class linear(kernpart):
|
|||
def K(self,X,X2,target):
|
||||
if self.ARD:
|
||||
XX = X*np.sqrt(self.variances)
|
||||
XX2 = X2*np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
if X2 is None:
|
||||
target += tdot(XX)
|
||||
else:
|
||||
XX2 = X2*np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
else:
|
||||
self._K_computations(X, X2)
|
||||
target += self.variances * self._dot_product
|
||||
|
|
@ -76,8 +80,11 @@ class linear(kernpart):
|
|||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
if self.ARD:
|
||||
product = X[:,None,:]*X2[None,:,:]
|
||||
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
|
||||
if X2 is None:
|
||||
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
|
||||
else:
|
||||
product = X[:,None,:]*X2[None,:,:]
|
||||
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
|
||||
else:
|
||||
self._K_computations(X, X2)
|
||||
target += np.sum(self._dot_product*dL_dK)
|
||||
|
|
@ -133,9 +140,9 @@ class linear(kernpart):
|
|||
returns N,M,M matrix
|
||||
"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
|
||||
target += psi2.sum(-1)
|
||||
#TODO: this could be faster using np.tensordot
|
||||
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
|
||||
#target += psi2.sum(-1)
|
||||
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
|
||||
|
||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
|
|
@ -156,28 +163,30 @@ class linear(kernpart):
|
|||
self._psi_computations(Z,mu,S)
|
||||
mu2_S = np.sum(self.mu2_S,0)# Q,
|
||||
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
|
||||
#TODO: tensordot would gain some time here
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
|
||||
self._Xcache = X
|
||||
self._X2cache = X2
|
||||
self._dot_product = np.dot(X,X2.T)
|
||||
else:
|
||||
# print "Cache hit!"
|
||||
pass # TODO: insert debug message here (logging framework)
|
||||
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
|
||||
self._Xcache = X.copy()
|
||||
if X2 is None:
|
||||
self._dot_product = tdot(X)
|
||||
self._X2cache = None
|
||||
else:
|
||||
self._X2cache = X2.copy()
|
||||
self._dot_product = np.dot(X,X2.T)
|
||||
|
||||
def _psi_computations(self,Z,mu,S):
|
||||
#here are the "statistics" for psi1 and psi2
|
||||
if not np.all(Z==self._Z):
|
||||
#Z has changed, compute Z specific stuff
|
||||
self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||
self._Z = Z
|
||||
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
|
||||
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
|
||||
self._Z = Z.copy()
|
||||
if not (np.all(mu==self._mu) and np.all(S==self._S)):
|
||||
self.mu2_S = np.square(mu)+S
|
||||
self._mu, self._S = mu, S
|
||||
self._mu, self._S = mu.copy(), S.copy()
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from kernpart import kernpart
|
|||
import numpy as np
|
||||
import hashlib
|
||||
from scipy import weave
|
||||
from ..util.linalg import tdot
|
||||
|
||||
class rbf(kernpart):
|
||||
"""
|
||||
|
|
@ -74,11 +75,8 @@ class rbf(kernpart):
|
|||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
||||
self._K_computations(X,X2)
|
||||
np.add(self.variance*self._K_dvar, target,target)
|
||||
target += self.variance*self._K_dvar
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
|
@ -87,6 +85,7 @@ class rbf(kernpart):
|
|||
self._K_computations(X,X2)
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
if self.ARD:
|
||||
if X2 is None: X2 = X
|
||||
[np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
|
||||
else:
|
||||
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
|
||||
|
|
@ -182,29 +181,31 @@ class rbf(kernpart):
|
|||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._X2 = X2.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None: X2 = X
|
||||
#never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
|
||||
#_K_dist = X[:,None,:]-X2[None,:,:]
|
||||
#_K_dist2 = np.square(_K_dist/self.lengthscale)
|
||||
X = X/self.lengthscale
|
||||
X2 = X2/self.lengthscale
|
||||
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
X = X/self.lengthscale
|
||||
Xsquare = np.sum(np.square(X),1)
|
||||
self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:])
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
X = X/self.lengthscale
|
||||
X2 = X2/self.lengthscale
|
||||
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
|
||||
self._K_dvar = np.exp(-0.5*self._K_dist2)
|
||||
|
||||
def _psi_computations(self,Z,mu,S):
|
||||
#here are the "statistics" for psi1 and psi2
|
||||
if not np.all(Z==self._Z):
|
||||
if not np.array_equal(Z, self._Z):
|
||||
#Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
||||
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
|
||||
self._Z = Z
|
||||
|
||||
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
|
||||
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
||||
#something's changed. recompute EVERYTHING
|
||||
|
||||
#psi1
|
||||
|
|
|
|||
|
|
@ -30,17 +30,15 @@ class white(kernpart):
|
|||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X.shape==X2.shape:
|
||||
if np.all(X==X2):
|
||||
np.add(target,np.eye(X.shape[0])*self.variance,target)
|
||||
if X2 is None:
|
||||
target += np.eye(X.shape[0])*self.variance
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
if X.shape==X2.shape:
|
||||
if np.all(X==X2):
|
||||
target += np.trace(dL_dK)
|
||||
if X2 is None:
|
||||
target += np.trace(dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.sum(dL_dKdiag)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class Gaussian(likelihood):
|
|||
self.trYYT = np.trace(self.YYT)
|
||||
else:
|
||||
self.YYT = None
|
||||
self.trYYT = None
|
||||
self.trYYT = np.sum(np.square(self.Y))
|
||||
|
||||
def _get_params(self):
|
||||
return np.asarray(self._variance)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
|||
X = self.initialise_latent(init, Q, Y)
|
||||
|
||||
if X_variance is None:
|
||||
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
|
||||
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:M]
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
|
||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot
|
||||
from ..util.plot import gpplot
|
||||
from .. import kern
|
||||
from GP import GP
|
||||
|
|
@ -50,9 +50,6 @@ class sparse_GP(GP):
|
|||
self.has_uncertain_inputs=True
|
||||
self.X_variance = X_variance
|
||||
|
||||
if not self.likelihood.is_heteroscedastic:
|
||||
self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
|
||||
|
||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||
|
||||
#normalize X uncertainty also
|
||||
|
|
@ -86,13 +83,15 @@ class sparse_GP(GP):
|
|||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
self.psi2_beta_scaled = tdot(tmp)
|
||||
else:
|
||||
if self.has_uncertain_inputs:
|
||||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||
else:
|
||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||
self.psi2_beta_scaled = tdot(tmp)
|
||||
|
||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||
|
||||
|
|
@ -110,10 +109,11 @@ class sparse_GP(GP):
|
|||
self.psi1V = np.dot(self.psi1, self.V)
|
||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
|
||||
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
|
||||
#TODO: can we multiply in C by forwardsubstitution?
|
||||
self.Cpsi1V = np.dot(self.C,self.psi1V)
|
||||
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
||||
#self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
|
||||
self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
|
||||
#self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
|
||||
self.E = tdot(self.Cpsi1V/sf)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
|
||||
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
||||
|
|
@ -166,9 +166,9 @@ class sparse_GP(GP):
|
|||
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
||||
else:
|
||||
#likelihood is not heterscedatic
|
||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
|
||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
|
||||
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
|
||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision # TODO: unstable?
|
||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
|
||||
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
|
|||
bias = GPy.kern.bias(2)
|
||||
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
|
||||
|
||||
def test_GP_regression_linear_kern_1D_ARD(self):
|
||||
''' Testing the GP regression with linear kernel on 1d data '''
|
||||
linear = GPy.kern.linear(1,ARD=True)
|
||||
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
|
||||
|
||||
def test_GP_regression_linear_kern_2D_ARD(self):
|
||||
''' Testing the GP regression with linear kernel on 2d data '''
|
||||
linear = GPy.kern.linear(2,ARD=True)
|
||||
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
|
||||
|
||||
def test_GP_regression_linear_kern_1D(self):
|
||||
''' Testing the GP regression with linear kernel on 1d data '''
|
||||
linear = GPy.kern.linear(1)
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ def tdot_blas(mat, out=None):
|
|||
if (mat.dtype != 'float64') or (len(mat.shape) != 2):
|
||||
return np.dot(mat, mat.T)
|
||||
nn = mat.shape[0]
|
||||
if not out:
|
||||
if out is None:
|
||||
out = np.zeros((nn,nn))
|
||||
else:
|
||||
assert(out.dtype == 'float64')
|
||||
|
|
@ -211,7 +211,7 @@ def tdot_blas(mat, out=None):
|
|||
# could avoid the copy. I also thought swapping to cblas API would allow use
|
||||
# of C order. However, I tried that and had errors with large matrices:
|
||||
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
|
||||
mat = mat.copy(order='F')
|
||||
mat = np.asfortranarray(mat)
|
||||
TRANS = c_char('n')
|
||||
N = c_int(mat.shape[0])
|
||||
K = c_int(mat.shape[1])
|
||||
|
|
@ -225,7 +225,7 @@ def tdot_blas(mat, out=None):
|
|||
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
|
||||
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
|
||||
|
||||
symmetrify(out.T)
|
||||
symmetrify(out,upper=True)
|
||||
|
||||
return out
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ def tdot(*args, **kwargs):
|
|||
else:
|
||||
return tdot_numpy(*args,**kwargs)
|
||||
|
||||
def symmetrify(A):
|
||||
def symmetrify(A,upper=False):
|
||||
"""
|
||||
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
|
||||
|
||||
|
|
@ -257,9 +257,13 @@ def symmetrify(A):
|
|||
}
|
||||
}
|
||||
"""
|
||||
if A.flags['C_CONTIGUOUS']:
|
||||
if A.flags['C_CONTIGUOUS'] and upper:
|
||||
weave.inline(f_contig_code,['A','N'])
|
||||
elif A.flags['C_CONTIGUOUS'] and not upper:
|
||||
weave.inline(c_contig_code,['A','N'])
|
||||
elif A.flags['F_CONTIGUOUS']:
|
||||
elif A.flags['F_CONTIGUOUS'] and upper:
|
||||
weave.inline(c_contig_code,['A','N'])
|
||||
elif A.flags['F_CONTIGUOUS'] and not upper:
|
||||
weave.inline(f_contig_code,['A','N'])
|
||||
else:
|
||||
tmp = np.tril(A)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue