Merge branch 'devel' of github.com:/sheffieldml/GPy into devel

This commit is contained in:
James Hensman 2015-04-01 09:15:52 +01:00
commit 64ccd3bea9
17 changed files with 867 additions and 401 deletions

View file

@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities() gamma = variational_posterior.gamma.values
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
var_mean = np.square(mu)/self.variance var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S)) var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum() var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities() gamma = variational_posterior.gamma.values
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1 variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2. S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi: if self.learnPi:
@ -162,25 +160,9 @@ class SpikeAndSlabPosterior(VariationalPosterior):
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob) self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
self.link_parameter(self.gamma) self.link_parameter(self.gamma)
@Cache_this(limit=5)
def gamma_probabilities(self):
prob = np.zeros_like(param_to_array(self.gamma))
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
prob1 = -np.zeros_like(param_to_array(self.gamma))
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
return prob, prob1
@Cache_this(limit=5)
def gamma_log_prob(self):
loggamma = param_to_array(self.gamma).copy()
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
loggamma1 = -param_to_array(self.gamma).copy()
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
return loggamma,loggamma1
def set_gradients(self, grad): def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad

View file

@ -62,7 +62,7 @@ class InferenceMethodList(LatentFunctionInference, list):
self.append(inf) self.append(inf)
from exact_gaussian_inference import ExactGaussianInference from exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace from laplace import Laplace, LaplaceBlock
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
from expectation_propagation import EP from expectation_propagation import EP
from expectation_propagation_dtc import EPDTC from expectation_propagation_dtc import EPDTC

View file

@ -43,28 +43,31 @@ class Laplace(LatentFunctionInference):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
# Compute K # Compute K
K = kern.K(X) K = kern.K(X)
#Find mode #Find mode
if self.bad_fhat or self.first_run: if self.bad_fhat or self.first_run:
Ki_f_init = np.zeros_like(Y) Ki_f_init = np.zeros_like(Y)
first_run = False self.first_run = False
else: else:
Ki_f_init = self._previous_Ki_fhat Ki_f_init = self._previous_Ki_fhat
Ki_f_init = np.zeros_like(Y)# FIXME: take this out
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat self.f_hat = f_hat
self.Ki_fhat = Ki_fhat #self.Ki_fhat = Ki_fhat
self.K = K.copy() #self.K = K.copy()
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy() self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
""" """
Rasmussen's numerically stable mode finding Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006 For nomenclature see Rasmussen & Williams 2006
@ -89,7 +92,12 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised) #define the objective function (to be maximised)
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata)) ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf difference = np.inf
iteration = 0 iteration = 0
@ -104,7 +112,7 @@ class Laplace(LatentFunctionInference):
W_f = W*f W_f = W*f
b = W_f + grad # R+W p46 line 6. b = W_f + grad # R+W p46 line 6.
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave) W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b)) W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
@ -121,7 +129,9 @@ class Laplace(LatentFunctionInference):
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12) step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new) f_new = np.dot(K, Ki_f_new)
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
raise ValueError("Shouldn't happen, brent optimization failing")
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new Ki_f = Ki_f_new
f = f_new f = f_new
@ -152,14 +162,10 @@ class Laplace(LatentFunctionInference):
if np.any(np.isnan(W)): if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN') raise ValueError('One or more element(s) of W is NaN')
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
# Compute matrices for derivatives # Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
@ -196,23 +202,23 @@ class Laplace(LatentFunctionInference):
dL_dthetaL = np.zeros(num_params) dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params): for thetaL_i in range(num_params):
#Explicit #Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i]) dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL # The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten()) + 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
) )
#Implicit #Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i]) dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i]) #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL) dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
else: else:
dL_dthetaL = np.zeros(likelihood.size) dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave): def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
""" """
Rasmussen suggests the use of a numerically stable positive definite matrix B Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal elements and can be easily inverted Which has a positive diagonal elements and can be easily inverted
@ -225,7 +231,7 @@ class Laplace(LatentFunctionInference):
""" """
if not log_concave: if not log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6)) #print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W<1e-6] = 1e-6 W = np.clip(W, 1e-6, 1e+30)
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!! # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur #W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
@ -247,5 +253,160 @@ class Laplace(LatentFunctionInference):
#K_Wi_i_2 , _= dpotri(L2) #K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2) #symmetrify(K_Wi_i_2)
return K_Wi_i, L, LiW12 #compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
class LaplaceBlock(Laplace):
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f)
#define the objective function (to be maximised)
def obj(Ki_f, f):
ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf
iteration = 0
I = np.eye(K.shape[0])
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
W_f = np.dot(W, f)
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
b = W_f + grad # R+W p46 line 6.
K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
#a = (I - (K+Wi)i*K)*b
full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
dKi_f = full_step_Ki_f - Ki_f
#define an objective for the line search (minimize this one)
def inner_obj(step_size):
Ki_f_trial = Ki_f + step_size*dKi_f
f_trial = np.dot(K, Ki_f_trial)
return -obj(Ki_f_trial, f_trial)
#use scipy for the line search, the compute new values of f, Ki_f
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
iteration += 1
#Warn of bad fits
if difference > self._mode_finding_tolerance:
if not self.bad_fhat:
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
self._previous_Ki_fhat = np.zeros_like(Y)
self.bad_fhat = True
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now fine again")
if iteration > self._mode_finding_max_iter:
warnings.warn("didn't find the best")
return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
#At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute the log marginal
#FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
#Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
#dL_dfhat = np.zeros((f_hat.shape[0]))
#for i in range(f_hat.shape[0]):
#dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
####################
#compute dL_dK#
####################
if kern.size > 0 and not kern.is_fixed:
#Explicit
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
#implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros_like(K)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
raise NotImplementedError
else:
dL_dthetaL = np.zeros(likelihood.size)
#self.K_Wi_i = K_Wi_i
#self.Ki_W_i = Ki_W_i
#self.W = W
#self.K = K
#self.dL_dfhat = dL_dfhat
#self.explicit_part = explicit_part
#self.implicit_part = implicit_part
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:returns: (K_Wi_i, L_B, not_provided)
"""
#w = GPy.util.diag.view(W)
#W[:] = np.where(w<1e-6, 1e-6, w)
#B = I + KW
B = np.eye(K.shape[0]) + np.dot(K, W)
#Bi, L, Li, logdetB = pdinv(B)
Bi = np.linalg.inv(B)
#K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
K_Wi_i = np.dot(W, Bi)
#self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
#self.B = B
#self.Bi = Bi
Ki_W_i = np.dot(Bi, K)
sign, logdetB = np.linalg.slogdet(B)
return K_Wi_i, sign*logdetB, Bi, Ki_W_i

View file

@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm, maxtries=100) if not np.isfinite(Kmm).all():
print Kmm
Lm = jitchol(Kmm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda, maxtries=100) LL = jitchol(Lambda)
logdet_L = 2.*np.sum(np.log(np.diag(LL))) logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0] b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
bbt = np.square(b).sum() bbt = np.square(b).sum()

View file

@ -16,5 +16,6 @@ from _src.poly import Poly
from _src.eq_ode2 import EQ_ODE2 from _src.eq_ode2 import EQ_ODE2
from _src.trunclinear import TruncLinear,TruncLinear_inf from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern from _src.splitKern import SplitKern,DEtime
from _src.splitKern import DEtime as DiffGenomeKern

View file

@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
# Compute for psi0 and psi1 # Compute for psi0 and psi1
mu2S = np.square(mu)+S mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu) dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z) dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance) dL_dS += dL_dpsi0[:,None]*variance*gamma
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu) dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
gamma2 = np.square(gamma) gamma2 = np.square(gamma)
variance2 = np.square(variance) variance2 = np.square(variance)
mu2S = mu2+S # NxQ mu2S = mu2+S # NxQ
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance) gvm = gamma*mu*variance
common_sum = np.einsum('nq,mq->nm',gvm,Z) common_sum = gvm.dot(Z.T)
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z) Z_expect_var2 = Z_expect*variance2
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z) common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
common_expect = np.einsum('mq,nm->nq',tmp,common_sum) Z2_expect = common_sum.dot(dL_dpsi2T)
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum) Z1_expect = dL_dpsi2T.dot(Z)
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\ dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\ dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\ dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2) dL_dS = gamma*Z_expect_var2
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum)) dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -22,12 +22,14 @@ try:
# _psi1 NxM # _psi1 NxM
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1) log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
psi0 = np.empty(N) psi0 = np.empty(N)
psi0[:] = variance psi0[:] = variance
@ -37,6 +39,7 @@ try:
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
@ -79,7 +82,7 @@ try:
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0) psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n return psi0,psi1,psi2,psi2n
@ -94,12 +97,13 @@ try:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1) log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() log_gamma = np.log(gamma)
gamma, gamma1 = variational_posterior.gamma_probabilities() log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
dvar = np.zeros(1) dvar = np.zeros(1)
@ -113,6 +117,7 @@ try:
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
@ -130,7 +135,6 @@ try:
double Zm1q = Z(m1,q); double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
double gnq = gamma(n,q); double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q); double mu_nq = mu(n,q);
if(m2==0) { if(m2==0) {
@ -156,7 +160,7 @@ try:
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum); dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
@ -184,7 +188,7 @@ try:
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
@ -192,7 +196,7 @@ try:
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale dl *= 2.*lengthscale
if not ARD: if not ARD:

View file

@ -7,7 +7,7 @@ from kern import Kern,CombinationKernel
from .independent_outputs import index_to_slices from .independent_outputs import index_to_slices
import itertools import itertools
class DiffGenomeKern(Kern): class DEtime(Kern):
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'): def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
self.idx_p = idx_p self.idx_p = idx_p

View file

@ -34,7 +34,9 @@ class Gaussian(Likelihood):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link" if not isinstance(gp_link, link_functions.Identity):
print "Warning, Exact inference is not implemeted for non-identity link functions,\
if you are not already, ensure Laplace inference_method is used"
super(Gaussian, self).__init__(gp_link, name=name) super(Gaussian, self).__init__(gp_link, name=name)
@ -263,16 +265,19 @@ class Gaussian(Likelihood):
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return dlogpdf_dvar dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return dlogpdf_dlink_dvar dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return d2logpdf_dlink2_dvar d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dtheta
def _mean(self, gp): def _mean(self, gp):
""" """

View file

@ -5,7 +5,7 @@ import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
import link_functions import link_functions
from ..util.misc import chain_1, chain_2, chain_3 from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from scipy.integrate import quad from scipy.integrate import quad
import warnings import warnings
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
@ -39,6 +39,7 @@ class Likelihood(Parameterized):
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation." assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link self.gp_link = gp_link
self.log_concave = False self.log_concave = False
self.not_block_really = False
def _gradients(self,partial): def _gradients(self,partial):
return np.zeros(0) return np.zeros(0)
@ -194,20 +195,27 @@ class Likelihood(Parameterized):
""" """
#conditional_mean: the edpected value of y given some f, under this likelihood #conditional_mean: the edpected value of y given some f, under this likelihood
fmin = -np.inf
fmax = np.inf
def int_mean(f,m,v): def int_mean(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
#If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
#If p is zero then conditional_mean will overflow #If p is zero then conditional_mean will overflow
assert v.all() > 0
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_mean(f)*p return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean return mean
def _conditional_mean(self, f): def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)""" """Quadrature calculation of the conditional mean: E(Y_star|f_star)"""
raise NotImplementedError, "implement this function to make predictions" raise NotImplementedError, "implement this function to make predictions"
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
@ -215,7 +223,7 @@ class Likelihood(Parameterized):
Approximation to the predictive variance: V(Y_star) Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used: The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
:param mu: mean of posterior :param mu: mean of posterior
:param sigma: standard deviation of posterior :param sigma: standard deviation of posterior
@ -225,15 +233,22 @@ class Likelihood(Parameterized):
#sigma2 = sigma**2 #sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*variance) normalizer = np.sqrt(2*np.pi*variance)
fmin_v = -np.inf
fmin_m = np.inf
fmin = -np.inf
fmax = np.inf
from ..util.misc import safe_exp
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
def int_var(f,m,v): def int_var(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow #If p is zero then conditional_variance will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_variance(f)*p return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
@ -245,14 +260,15 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 ) #E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq): def int_pred_mean_sq(f,m,v,predictive_mean_sq):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
p = np.exp(exponent)
#If p is zero then conditional_mean**2 will overflow #If p is zero then conditional_mean**2 will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_mean(f)**2*p return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq var_exp = exp_exp2 - predictive_mean_sq
@ -300,8 +316,18 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
inv_link_f = self.gp_link.transf(f) if isinstance(self.gp_link, link_functions.Identity):
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) return self.pdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf_sum(self, f, y, Y_metadata=None):
"""
Convenience function that can overridden for functions where this could
be computed more efficiently
"""
return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
def logpdf(self, f, y, Y_metadata=None): def logpdf(self, f, y, Y_metadata=None):
""" """
@ -318,8 +344,11 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
inv_link_f = self.gp_link.transf(f) if isinstance(self.gp_link, link_functions.Identity):
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) return self.logpdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None): def dlogpdf_df(self, f, y, Y_metadata=None):
""" """
@ -337,11 +366,15 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
inv_link_f = self.gp_link.transf(f) if isinstance(self.gp_link, link_functions.Identity):
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) else:
return chain_1(dlogpdf_dlink, dlink_df) inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
@blockify_hessian
def d2logpdf_df2(self, f, y, Y_metadata=None): def d2logpdf_df2(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the second derivative of log likelihood using it Evaluates the link function link(f) then computes the second derivative of log likelihood using it
@ -358,13 +391,18 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
inv_link_f = self.gp_link.transf(f) if isinstance(self.gp_link, link_functions.Identity):
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) else:
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) inv_link_f = self.gp_link.transf(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
return d2logpdf_df2
@blockify_third
def d3logpdf_df3(self, f, y, Y_metadata=None): def d3logpdf_df3(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the third derivative of log likelihood using it Evaluates the link function link(f) then computes the third derivative of log likelihood using it
@ -381,64 +419,96 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
inv_link_f = self.gp_link.transf(f) if isinstance(self.gp_link, link_functions.Identity):
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) else:
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) inv_link_f = self.gp_link.transf(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f)
d3link_df3 = self.gp_link.d3transf_df3(f) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f)
d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
return d3logpdf_df3
def dlogpdf_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
inv_link_f = self.gp_link.transf(f) if self.not_block_really:
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([1, 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
inv_link_f = self.gp_link.transf(f) if self.not_block_really:
dlink_df = self.gp_link.dtransf_df(f) raise NotImplementedError("Need to make a decorator for this!")
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) if isinstance(self.gp_link, link_functions.Identity):
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
return dlogpdf_df_dtheta
#return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
inv_link_f = self.gp_link.transf(f) if self.not_block_really:
dlink_df = self.gp_link.dtransf_df(f) raise NotImplementedError("Need to make a decorator for this!")
d2link_df2 = self.gp_link.d2transf_df2(f) if isinstance(self.gp_link, link_functions.Identity):
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) else:
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
return d2logpdf_df2_dtheta
#return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0) dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata) dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata) d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names' #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
assert len(dlogpdf_dtheta) == self.size #1 x num_param array assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
@ -459,19 +529,98 @@ class Likelihood(Parameterized):
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten() #ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata) #ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp) #ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp, Y_metadata=None): def samples(self, gp, Y_metadata=None, samples=1):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable :param gp: latent variable
:param samples: number of samples to take for each f location
""" """
raise NotImplementedError raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
MCMC_pdf_samples in likelihood.py and write samples function
using this, beware this is a simple implementation
of Metropolis and will not work well for all likelihoods""")
def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
"""
Simple implementation of Metropolis sampling algorithm
Will run a parallel chain for each input dimension (treats each f independently)
Thus assumes f*_1 independant of f*_2 etc.
:param num_samples: Number of samples to take
:param fNew: f at which to sample around
:param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
:param stepsize: Stepsize for the normal proposal distribution (will need modifying)
:param burnin: number of samples to use for burnin (will need modifying)
:param Y_metadata: Y_metadata for pdf
"""
print "Warning, using MCMC for sampling y*, needs to be tuned!"
if starting_loc is None:
starting_loc = fNew
from functools import partial
logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
#Should be the link function of f is a good starting point
#(i.e. the point before you corrupt it with the likelihood)
par_chains = starting_loc.shape[0]
chain_values = np.zeros((par_chains, num_samples))
chain_values[:, 0][:,None] = starting_loc
#Use same stepsize for all par_chains
stepsize = np.ones(par_chains)*stepsize
accepted = np.zeros((par_chains, num_samples+burn_in))
accept_ratio = np.zeros(num_samples+burn_in)
#Whilst burning in, only need to keep the previous lot
burnin_cache = np.zeros(par_chains)
burnin_cache[:] = starting_loc.flatten()
burning_in = True
for i in xrange(burn_in+num_samples):
next_ind = i-burn_in
if burning_in:
old_y = burnin_cache
else:
old_y = chain_values[:,next_ind-1]
old_lik = pdf(old_y)
#Propose new y from Gaussian proposal
new_y = np.random.normal(loc=old_y, scale=stepsize)
new_lik = pdf(new_y)
#Accept using Metropolis (not hastings) acceptance
#Always accepts if new_lik > old_lik
accept_probability = np.minimum(1, new_lik/old_lik)
u = np.random.uniform(0,1,par_chains)
#print "Accept prob: ", accept_probability
accepts = u < accept_probability
if burning_in:
burnin_cache[accepts] = new_y[accepts]
burnin_cache[~accepts] = old_y[~accepts]
if i == burn_in:
burning_in = False
chain_values[:,0] = burnin_cache
else:
#If it was accepted then new_y becomes the latest sample
chain_values[accepts, next_ind] = new_y[accepts]
#Otherwise use old y as the sample
chain_values[~accepts, next_ind] = old_y[~accepts]
accepted[~accepts, i] = 0
accepted[accepts, i] = 1
accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
#Show progress
if i % int((burn_in+num_samples)*0.1) == 0:
print "{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i)
print "Last run accept ratio: ", accept_ratio[i]
print "Average accept ratio: ", np.mean(accept_ratio)
return chain_values

View file

@ -232,12 +232,13 @@ class StudentT(Likelihood):
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
# The comment here confuses mean and median. # The comment here confuses mean and median.

View file

@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
if Gamma is None: if Gamma is None:
gamma = np.random.randn(X.shape[0], input_dim) gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
else: else:
gamma = Gamma.copy() gamma = Gamma.copy()

View file

@ -14,7 +14,6 @@ class InferenceXTestCase(unittest.TestCase):
def genData(self): def genData(self):
D1,D2,N = 12,12,50 D1,D2,N = 12,12,50
np.random.seed(1234)
x = np.linspace(0, 4 * np.pi, N)[:, None] x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x)) s1 = np.vectorize(lambda x: np.sin(x))
@ -63,10 +62,11 @@ class InferenceXTestCase(unittest.TestCase):
self.assertTrue(mi.checkgrad()) self.assertTrue(mi.checkgrad())
m.optimize(max_iters=10000) m.optimize(max_iters=10000)
x,mi = m.infer_newX(m.Y) x, mi = m.infer_newX(m.Y)
self.assertTrue(np.allclose(m.X.mean, mi.X.mean)) print m.X.mean - mi.X.mean
self.assertTrue(np.allclose(m.X.variance, mi.X.variance)) self.assertTrue(np.allclose(m.X.mean, mi.X.mean, rtol=1e-4, atol=1e-4))
self.assertTrue(np.allclose(m.X.variance, mi.X.variance, rtol=1e-4, atol=1e-4))
def test_inferenceX_GPLVM(self): def test_inferenceX_GPLVM(self):
Ys = self.genData() Ys = self.genData()

View file

@ -10,7 +10,7 @@ from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param from GPy.core.parameterization import Param
from functools import partial from functools import partial
#np.random.seed(300) #np.random.seed(300)
#np.random.seed(7) #np.random.seed(4)
#np.seterr(divide='raise') #np.seterr(divide='raise')
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
@ -52,8 +52,17 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
zipped_params = zip(params, params_names) zipped_params = zip(params, params_names)
for param_ind, (param_val, param_name) in enumerate(zipped_params): for param_ind, (param_val, param_name) in enumerate(zipped_params):
#Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter #Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0] f_ = partial_f(param_val, param_name)
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0] df_ = partial_df(param_val, param_name)
#Reshape it such that we have a 3d matrix incase, that is we want it (?, N, D) regardless of whether ? is num_params or not
f_ = f_.reshape(-1, f_.shape[0], f_.shape[1])
df_ = df_.reshape(-1, f_.shape[0], f_.shape[1])
#Get the number of f and number of dimensions
fnum = f_.shape[-2]
fdim = f_.shape[-1]
dfnum = df_.shape[-2]
for fixed_val in range(dfnum): for fixed_val in range(dfnum):
#dlik and dlik_dvar gives back 1 value for each #dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1 f_ind = min(fnum, fixed_val+1) - 1
@ -61,9 +70,13 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
#Make grad checker with this param moving, note that set_params is NOT being called #Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__ #The parameter is being set directly with __setattr__
#Check only the parameter and function value we wish to check at a time #Check only the parameter and function value we wish to check at a time
grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind], #func = lambda p_val, fnum, fdim, param_ind, f_ind, param_ind: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind], #dfunc_dparam = lambda d_val, fnum, fdim, param_ind, fixed_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
param_val, [param_name])
#First we reshape the output such that it is (num_params, N, D) then we pull out the relavent parameter-findex and checkgrad just this index at a time
func = lambda p_val: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
dfunc_dparam = lambda d_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
grad = GradientChecker(func, dfunc_dparam, param_val, [param_name])
if constraints is not None: if constraints is not None:
for constrain_param, constraint in constraints: for constrain_param, constraint in constraints:
@ -104,37 +117,9 @@ class TestNoiseModels(object):
self.var = 0.2 self.var = 0.2
self.var = np.random.rand(1)
#Make a bigger step as lower bound can be quite curved #Make a bigger step as lower bound can be quite curved
self.step = 1e-4 self.step = 1e-4
def tearDown(self):
self.Y = None
self.f = None
self.X = None
def test_scale2_models(self):
self.setUp()
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_fixed(regex, model):
model[regex].constrain_fixed()
def constrain_negative(regex, model):
model[regex].constrain_negative()
def constrain_positive(regex, model):
model[regex].constrain_positive()
def constrain_bounded(regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model[regex].constrain_bounded(lower, upper)
""" """
Dictionary where we nest models we would like to check Dictionary where we nest models we would like to check
Name: { Name: {
@ -149,136 +134,170 @@ class TestNoiseModels(object):
"link_f_constraints": [constraint_wrappers, listed_here] "link_f_constraints": [constraint_wrappers, listed_here]
} }
""" """
noise_models = {"Student_t_default": { self.noise_models = {"Student_t_default": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_scale2"], "names": [".*t_scale2"],
"vals": [self.var], "vals": [self.var],
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
#"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] },
}, "laplace": True
"laplace": True },
}, "Student_t_1_var": {
"Student_t_1_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": {
"grad_params": { "names": [".*t_scale2"],
"names": [".*t_scale2"], "vals": [1.0],
"vals": [1.0], "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] },
}, "laplace": True
"laplace": True },
}, "Student_t_small_deg_free": {
"Student_t_small_deg_free": { "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), "grad_params": {
"grad_params": { "names": [".*t_scale2"],
"names": [".*t_scale2"], "vals": [self.var],
"vals": [self.var], "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] },
}, "laplace": True
"laplace": True },
}, "Student_t_small_var": {
"Student_t_small_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": {
"grad_params": { "names": [".*t_scale2"],
"names": [".*t_scale2"], "vals": [0.001],
"vals": [0.001], "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] },
}, "laplace": True
"laplace": True },
}, "Student_t_large_var": {
"Student_t_large_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": {
"grad_params": { "names": [".*t_scale2"],
"names": [".*t_scale2"], "vals": [10.0],
"vals": [10.0], "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] },
}, "laplace": True
"laplace": True },
}, "Student_t_approx_gauss": {
"Student_t_approx_gauss": { "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), "grad_params": {
"grad_params": { "names": [".*t_scale2"],
"names": [".*t_scale2"], "vals": [self.var],
"vals": [self.var], "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] },
}, "laplace": True
"laplace": True },
}, #"Student_t_log": {
"Student_t_log": { #"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), #"grad_params": {
"grad_params": { #"names": [".*t_noise"],
"names": [".*t_scale2"], #"vals": [self.var],
"vals": [self.var], #"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] #},
}, #"laplace": True
"laplace": True #},
}, "Gaussian_default": {
"Gaussian_default": { "model": GPy.likelihoods.Gaussian(variance=self.var),
"model": GPy.likelihoods.Gaussian(variance=self.var), "grad_params": {
"grad_params": { "names": [".*variance"],
"names": [".*variance"], "vals": [self.var],
"vals": [self.var], "constraints": [(".*variance", self.constrain_positive)]
"constraints": [(".*variance", constrain_positive)] },
}, "laplace": True,
"laplace": True, "ep": False # FIXME: Should be True when we have it working again
"ep": False # FIXME: Should be True when we have it working again },
}, "Gaussian_log": {
#"Gaussian_log": { "model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N), "grad_params": {
#"grad_params": { "names": [".*variance"],
#"names": ["noise_model_variance"], "vals": [self.var],
#"vals": [self.var], "constraints": [(".*variance", self.constrain_positive)]
#"constraints": [constrain_positive] },
#}, "laplace": True
#"laplace": True },
#}, #"Gaussian_probit": {
#"Gaussian_probit": { #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N), #"grad_params": {
#"grad_params": { #"names": ["noise_model_variance"],
#"names": ["noise_model_variance"], #"vals": [self.var],
#"vals": [self.var], #"constraints": [constrain_positive]
#"constraints": [constrain_positive] #},
#}, #"laplace": True
#"laplace": True #},
#}, #"Gaussian_log_ex": {
#"Gaussian_log_ex": { #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N), #"grad_params": {
#"grad_params": { #"names": ["noise_model_variance"],
#"names": ["noise_model_variance"], #"vals": [self.var],
#"vals": [self.var], #"constraints": [constrain_positive]
#"constraints": [constrain_positive] #},
#}, #"laplace": True
#"laplace": True #},
#}, "Bernoulli_default": {
"Bernoulli_default": { "model": GPy.likelihoods.Bernoulli(),
"model": GPy.likelihoods.Bernoulli(), "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], "laplace": True,
"laplace": True, "Y": self.binary_Y,
"Y": self.binary_Y, "ep": False # FIXME: Should be True when we have it working again
"ep": False # FIXME: Should be True when we have it working again },
}, "Exponential_default": {
"Exponential_default": { "model": GPy.likelihoods.Exponential(),
"model": GPy.likelihoods.Exponential(), "link_f_constraints": [self.constrain_positive],
"link_f_constraints": [constrain_positive], "Y": self.positive_Y,
"Y": self.positive_Y, "laplace": True,
"laplace": True, },
}, "Poisson_default": {
"Poisson_default": { "model": GPy.likelihoods.Poisson(),
"model": GPy.likelihoods.Poisson(), "link_f_constraints": [self.constrain_positive],
"link_f_constraints": [constrain_positive], "Y": self.integer_Y,
"Y": self.integer_Y, "laplace": True,
"laplace": True, "ep": False #Should work though...
"ep": False #Should work though... },
}#, #,
#GAMMA needs some work!"Gamma_default": { #GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.Gamma(), #"model": GPy.likelihoods.Gamma(),
#"link_f_constraints": [constrain_positive], #"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y, #"Y": self.positive_Y,
#"laplace": True #"laplace": True
#} #}
} }
for name, attributes in noise_models.iteritems():
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_fixed(self, regex, model):
model[regex].constrain_fixed()
def constrain_negative(self, regex, model):
model[regex].constrain_negative()
def constrain_positive(self, regex, model):
model[regex].constrain_positive()
def constrain_fixed_below(self, regex, model, up_to):
model[regex][0:up_to].constrain_fixed()
def constrain_fixed_above(self, regex, model, above):
model[regex][above:].constrain_fixed()
def constrain_bounded(self, regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model[regex].constrain_bounded(lower, upper)
def tearDown(self):
self.Y = None
self.f = None
self.X = None
def test_scale2_models(self):
self.setUp()
for name, attributes in self.noise_models.iteritems():
model = attributes["model"] model = attributes["model"]
if "grad_params" in attributes: if "grad_params" in attributes:
params = attributes["grad_params"] params = attributes["grad_params"]
@ -290,7 +309,7 @@ class TestNoiseModels(object):
param_vals = [] param_vals = []
param_names = [] param_names = []
constrain_positive = [] constrain_positive = []
param_constraints = [] # ??? TODO: Saul to Fix. param_constraints = []
if "link_f_constraints" in attributes: if "link_f_constraints" in attributes:
link_f_constraints = attributes["link_f_constraints"] link_f_constraints = attributes["link_f_constraints"]
else: else:
@ -303,6 +322,10 @@ class TestNoiseModels(object):
f = attributes["f"].copy() f = attributes["f"].copy()
else: else:
f = self.f.copy() f = self.f.copy()
if "Y_metadata" in attributes:
Y_metadata = attributes["Y_metadata"].copy()
else:
Y_metadata = None
if "laplace" in attributes: if "laplace" in attributes:
laplace = attributes["laplace"] laplace = attributes["laplace"]
else: else:
@ -317,30 +340,30 @@ class TestNoiseModels(object):
#Required by all #Required by all
#Normal derivatives #Normal derivatives
yield self.t_logpdf, model, Y, f yield self.t_logpdf, model, Y, f, Y_metadata
yield self.t_dlogpdf_df, model, Y, f yield self.t_dlogpdf_df, model, Y, f, Y_metadata
yield self.t_d2logpdf_df2, model, Y, f yield self.t_d2logpdf_df2, model, Y, f, Y_metadata
#Link derivatives #Link derivatives
yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints yield self.t_dlogpdf_dlink, model, Y, f, Y_metadata, link_f_constraints
yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints yield self.t_d2logpdf_dlink2, model, Y, f, Y_metadata, link_f_constraints
if laplace: if laplace:
#Laplace only derivatives #Laplace only derivatives
yield self.t_d3logpdf_df3, model, Y, f yield self.t_d3logpdf_df3, model, Y, f, Y_metadata
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints yield self.t_d3logpdf_dlink3, model, Y, f, Y_metadata, link_f_constraints
#Params #Params
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_dlogpdf_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_dlogpdf_df_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_d2logpdf2_df2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
#Link params #Link params
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_dlogpdf_link_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_dlogpdf_dlink_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
#laplace likelihood gradcheck #laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if ep: if ep:
#ep likelihood gradcheck #ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
self.tearDown() self.tearDown()
@ -349,41 +372,41 @@ class TestNoiseModels(object):
# dpdf_df's # # dpdf_df's #
############# #############
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_logpdf(self, model, Y, f): def t_logpdf(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
#print model._get_params() #print model._get_params()
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
model.pdf(f.copy(), Y.copy()).prod(), model.pdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).prod(),
np.exp(model.logpdf(f.copy(), Y.copy()).sum()) np.exp(model.logpdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).sum())
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_df(self, model, Y, f): def t_dlogpdf_df(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.description = "\n{}".format(inspect.stack()[0][3]) self.description = "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(np.sum(model.logpdf), y=Y) logpdf = functools.partial(np.sum(model.logpdf), y=Y, Y_metadata=Y_metadata)
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
grad.randomize() grad.randomize()
print model print model
assert grad.checkgrad(verbose=1) assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf_df2(self, model, Y, f): def t_d2logpdf_df2(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g') grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
grad.randomize() grad.randomize()
print model print model
assert grad.checkgrad(verbose=1) assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d3logpdf_df3(self, model, Y, f): def t_d3logpdf_df3(self, model, Y, f, Y_metadata):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y) d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g') grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
grad.randomize() grad.randomize()
print model print model
@ -393,32 +416,32 @@ class TestNoiseModels(object):
# df_dparams # # df_dparams #
############## ##############
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints): def t_dlogpdf_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints): def t_dlogpdf_df_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints): def t_d2logpdf2_df2_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, params_names, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@ -426,10 +449,10 @@ class TestNoiseModels(object):
# dpdf_dlink's # # dpdf_dlink's #
################ ################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints): def t_dlogpdf_dlink(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
logpdf = functools.partial(model.logpdf_link, y=Y) logpdf = functools.partial(model.logpdf_link, y=Y, Y_metadata=Y_metadata)
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g') grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
#Apply constraints to link_f values #Apply constraints to link_f values
@ -442,10 +465,10 @@ class TestNoiseModels(object):
assert grad.checkgrad(verbose=1) assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints): def t_d2logpdf_dlink2(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g') grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
#Apply constraints to link_f values #Apply constraints to link_f values
@ -458,10 +481,10 @@ class TestNoiseModels(object):
assert grad.checkgrad(verbose=1) assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints): def t_d3logpdf_dlink3(self, model, Y, f, Y_metadata, link_f_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y) d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y, Y_metadata=Y_metadata)
grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g') grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
#Apply constraints to link_f values #Apply constraints to link_f values
@ -477,32 +500,32 @@ class TestNoiseModels(object):
# dlink_dparams # # dlink_dparams #
################# #################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints): def t_dlogpdf_link_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints): def t_dlogpdf_dlink_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints): def t_d2logpdf2_dlink2_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, param_names, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@ -510,14 +533,15 @@ class TestNoiseModels(object):
# laplace test # # laplace test #
################ ################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): def t_laplace_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
#Normalize #Normalize
Y = Y/Y.max() Y = Y/Y.max()
white_var = 1e-6 white_var = 1e-5
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1]) kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
laplace_likelihood = GPy.inference.latent_function_inference.Laplace() laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=laplace_likelihood)
m['.*white'].constrain_fixed(white_var) m['.*white'].constrain_fixed(white_var)
#Set constraints #Set constraints
@ -526,6 +550,7 @@ class TestNoiseModels(object):
print m print m
m.randomize() m.randomize()
m.randomize()
#Set params #Set params
for param_num in range(len(param_names)): for param_num in range(len(param_names)):
@ -545,14 +570,15 @@ class TestNoiseModels(object):
# EP test # # EP test #
########### ###########
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): def t_ep_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
#Normalize #Normalize
Y = Y/Y.max() Y = Y/Y.max()
white_var = 1e-6 white_var = 1e-6
kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1]) kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
ep_inf = GPy.inference.latent_function_inference.EP() ep_inf = GPy.inference.latent_function_inference.EP()
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf)
m['.*white'].constrain_fixed(white_var) m['.*white'].constrain_fixed(white_var)
for param_num in range(len(param_names)): for param_num in range(len(param_names)):
@ -571,8 +597,8 @@ class LaplaceTests(unittest.TestCase):
""" """
def setUp(self): def setUp(self):
self.N = 5 self.N = 15
self.D = 3 self.D = 1
self.X = np.random.rand(self.N, self.D)*10 self.X = np.random.rand(self.N, self.D)*10
self.real_std = 0.1 self.real_std = 0.1
@ -636,20 +662,20 @@ class LaplaceTests(unittest.TestCase):
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference() exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf) m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
m1['.*white'].constrain_fixed(1e-6) m1['.*white'].constrain_fixed(1e-6)
m1['.*rbf.variance'] = initial_var_guess m1['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
m1['.*rbf.variance'].constrain_bounded(1e-4, 10)
m1.randomize() m1.randomize()
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess) gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
laplace_inf = GPy.inference.latent_function_inference.Laplace() laplace_inf = GPy.inference.latent_function_inference.Laplace()
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf) m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
m2['.*white'].constrain_fixed(1e-6) m2['.*white'].constrain_fixed(1e-6)
m2['.*rbf.variance'].constrain_bounded(1e-4, 10) m2['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
m2.randomize() m2.randomize()
if debug: if debug:
print m1 print m1
print m2 print m2
optimizer = 'scg' optimizer = 'scg'
print "Gaussian" print "Gaussian"
m1.optimize(optimizer, messages=debug, ipython_notebook=False) m1.optimize(optimizer, messages=debug, ipython_notebook=False)
@ -687,8 +713,6 @@ class LaplaceTests(unittest.TestCase):
pb.scatter(X, m1.likelihood.Y, c='g') pb.scatter(X, m1.likelihood.Y, c='g')
pb.scatter(X, m2.likelihood.Y, c='r', marker='x') pb.scatter(X, m2.likelihood.Y, c='r', marker='x')
#Check Y's are the same #Check Y's are the same
np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5) np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
#Check marginals are the same #Check marginals are the same

18
GPy/testing/misc_tests.py Normal file
View file

@ -0,0 +1,18 @@
import numpy as np
import scipy as sp
import GPy
class MiscTests(np.testing.TestCase):
"""
Testing some utilities of misc
"""
def setUp(self):
self._lim_val = np.finfo(np.float64).max
self._lim_val_exp = np.log(self._lim_val)
def test_safe_exp_upper(self):
assert np.exp(self._lim_val_exp + 1) == np.inf
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
def test_safe_exp_lower(self):
assert GPy.util.misc.safe_exp(1e-10) < np.inf

View file

@ -17,6 +17,54 @@ def get_blocks(A, blocksizes):
count_i += i count_i += i
return B return B
def get_block_shapes(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
return [B[b,b].shape[0] for b in range(0, B.shape[0])]
def unblock(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
block_shapes = get_block_shapes(B)
num_elements = np.sum(block_shapes)
A = np.empty(shape=(num_elements, num_elements))
count_i = 0
for Bi, i in enumerate(block_shapes):
count_j = 0
for Bj, j in enumerate(block_shapes):
A[count_i:count_i + i, count_j:count_j + j] = B[Bi, Bj]
count_j += j
count_i += i
return A
def block_dot(A, B):
"""
Element wise dot product on block matricies
+------+------+ +------+------+ +-------+-------+
| | | | | | |A11.B11|B12.B12|
| A11 | A12 | | B11 | B12 | | | |
+------+------+ o +------+------| = +-------+-------+
| | | | | | |A21.B21|A22.B22|
| A21 | A22 | | B21 | B22 | | | |
+-------------+ +------+------+ +-------+-------+
..Note
If either (A or B) of the diagonal matrices are stored as vectors then a more
efficient dot product using numpy broadcasting will be used, i.e. A11*B11
"""
#Must have same number of blocks and be a block matrix
assert A.dtype is np.dtype('object'), "Must be a block matrix"
assert B.dtype is np.dtype('object'), "Must be a block matrix"
Ashape = A.shape
Bshape = B.shape
assert Ashape == Bshape
def f(A,B):
if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
#FIXME: Careful if one is transpose of other, would make a matrix
return A*B
else:
return np.dot(A,B)
dot = np.vectorize(f, otypes = [np.object])
return dot(A,B)
if __name__=='__main__': if __name__=='__main__':
@ -24,3 +72,5 @@ if __name__=='__main__':
B = get_blocks(A,[2,3]) B = get_blocks(A,[2,3])
B[0,0] += 7 B[0,0] += 7
print B print B
assert np.all(unblock(B) == A)

View file

@ -4,6 +4,16 @@
import numpy as np import numpy as np
from config import * from config import *
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = np.power(_lim_val, -3)
def safe_exp(f):
clip_f = np.clip(f, -np.inf, _lim_val_exp)
return np.exp(clip_f)
def chain_1(df_dg, dg_dx): def chain_1(df_dg, dg_dx):
""" """
Generic chaining function for first derivative Generic chaining function for first derivative
@ -11,6 +21,11 @@ def chain_1(df_dg, dg_dx):
.. math:: .. math::
\\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx} \\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
""" """
if np.all(dg_dx==1.):
return df_dg
if len(df_dg) > 1 and df_dg.shape[-1] > 1:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
raise NotImplementedError('Not implemented for matricies yet')
return df_dg * dg_dx return df_dg * dg_dx
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2): def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@ -20,7 +35,13 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
.. math:: .. math::
\\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}} \\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
""" """
return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2 if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
return d2f_dg2
if len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
dg_dx_2 = dg_dx**2
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3): def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
""" """
@ -29,11 +50,18 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
.. math:: .. math::
\\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}} \\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
""" """
return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3 if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
return d3f_dg3
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
dg_dx_3 = dg_dx**3
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
def opt_wrapper(m, **kwargs): def opt_wrapper(m, **kwargs):
""" """
This function just wraps the optimization procedure of a GPy Thit function just wraps the optimization procedure of a GPy
object so that optimize() pickleable (necessary for multiprocessing). object so that optimize() pickleable (necessary for multiprocessing).
""" """
m.optimize(**kwargs) m.optimize(**kwargs)
@ -96,3 +124,47 @@ from :class:ndarray)"""
if len(param) == 1: if len(param) == 1:
return param[0].view(np.ndarray) return param[0].view(np.ndarray)
return [x.view(np.ndarray) for x in param] return [x.view(np.ndarray) for x in param]
def blockify_hessian(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (retval.shape[0] != retval.shape[1]):
return np.diagflat(retval)
else:
return retval
return wrapper_func
def blockify_third(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (len(retval.shape) < 3):
num_data = retval.shape[0]
d3_block_cache = np.zeros((num_data, num_data, num_data))
diag_slice = range(num_data)
d3_block_cache[diag_slice, diag_slice, diag_slice] = np.squeeze(retval)
return d3_block_cache
else:
return retval
return wrapper_func
def blockify_dhess_dtheta(func):
def wrapper_func(self, *args, **kwargs):
# Invoke the wrapped function first
retval = func(self, *args, **kwargs)
# Now do something here with retval and/or action
if self.not_block_really and (len(retval.shape) < 3):
num_data = retval.shape[0]
num_params = retval.shape[-1]
dhess_dtheta = np.zeros((num_data, num_data, num_params))
diag_slice = range(num_data)
for param_ind in range(num_params):
dhess_dtheta[diag_slice, diag_slice, param_ind] = np.squeeze(retval[:,param_ind])
return dhess_dtheta
else:
return retval
return wrapper_func