From d7316ee7d916ea38fb87b21876c71d849a09512a Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 27 Mar 2015 13:49:12 +0000 Subject: [PATCH 01/10] Relaxed inference test requirement --- GPy/testing/inference_tests.py | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py index ac92c519..d5039049 100644 --- a/GPy/testing/inference_tests.py +++ b/GPy/testing/inference_tests.py @@ -11,39 +11,38 @@ import GPy class InferenceXTestCase(unittest.TestCase): - + def genData(self): D1,D2,N = 12,12,50 - np.random.seed(1234) - + x = np.linspace(0, 4 * np.pi, N)[:, None] s1 = np.vectorize(lambda x: np.sin(x)) s2 = np.vectorize(lambda x: np.cos(x)**2) s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x))) sS = np.vectorize(lambda x: np.cos(x)) - + s1 = s1(x) s2 = s2(x) s3 = s3(x) sS = sS(x) - + s1 -= s1.mean(); s1 /= s1.std(0) s2 -= s2.mean(); s2 /= s2.std(0) s3 -= s3.mean(); s3 /= s3.std(0) sS -= sS.mean(); sS /= sS.std(0) - + S1 = np.hstack([s1, sS]) S2 = np.hstack([s3, sS]) - + P1 = np.random.randn(S1.shape[1], D1) P2 = np.random.randn(S2.shape[1], D2) - + Y1 = S1.dot(P1) Y2 = S2.dot(P2) - + Y1 += .01 * np.random.randn(*Y1.shape) Y2 += .01 * np.random.randn(*Y2.shape) - + Y1 -= Y1.mean(0) Y2 -= Y2.mean(0) Y1 /= Y1.std(0) @@ -52,33 +51,34 @@ class InferenceXTestCase(unittest.TestCase): slist = [s1, s2, s3, sS] slist_names = ["s1", "s2", "s3", "sS"] Ylist = [Y1, Y2] - + return Ylist - + def test_inferenceX_BGPLVM(self): Ys = self.genData() m = GPy.models.BayesianGPLVM(Ys[0],5,kernel=GPy.kern.Linear(5,ARD=True)) - + x,mi = m.infer_newX(m.Y, optimize=False) self.assertTrue(mi.checkgrad()) - - m.optimize(max_iters=10000) - x,mi = m.infer_newX(m.Y) - self.assertTrue(np.allclose(m.X.mean, mi.X.mean)) - self.assertTrue(np.allclose(m.X.variance, mi.X.variance)) + m.optimize(max_iters=10000) + x, mi = m.infer_newX(m.Y) + + print m.X.mean - mi.X.mean + self.assertTrue(np.allclose(m.X.mean, mi.X.mean, rtol=1e-4, atol=1e-4)) + self.assertTrue(np.allclose(m.X.variance, mi.X.variance, rtol=1e-4, atol=1e-4)) def test_inferenceX_GPLVM(self): Ys = self.genData() m = GPy.models.GPLVM(Ys[0],3,kernel=GPy.kern.RBF(3,ARD=True)) - + x,mi = m.infer_newX(m.Y, optimize=False) self.assertTrue(mi.checkgrad()) - + # m.optimize(max_iters=10000) # x,mi = m.infer_newX(m.Y) # self.assertTrue(np.allclose(m.X, x)) - + if __name__ == "__main__": unittest.main() From 932b5468ae41ffa33a2f612073b3f25548e5d164 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 11 Feb 2014 12:14:11 +0000 Subject: [PATCH 02/10] Adding likelihoods and block matrices --- GPy/util/block_matrices.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py index 95920868..cdbb1b0b 100644 --- a/GPy/util/block_matrices.py +++ b/GPy/util/block_matrices.py @@ -17,6 +17,23 @@ def get_blocks(A, blocksizes): count_i += i return B +def get_block_shapes(B): + assert B.dtype is np.dtype('object'), "Must be a block matrix" + return [B[b,b].shape[0] for b in range(0, B.shape[0])] + +def unblock(B): + assert B.dtype is np.dtype('object'), "Must be a block matrix" + block_shapes = get_block_shapes(B) + num_elements = np.sum(block_shapes) + A = np.empty(shape=(num_elements, num_elements)) + count_i = 0 + for Bi, i in enumerate(block_shapes): + count_j = 0 + for Bj, j in enumerate(block_shapes): + A[count_i:count_i + i, count_j:count_j + j] = B[Bi, Bj] + count_j += j + count_i += i + return A if __name__=='__main__': @@ -24,3 +41,8 @@ if __name__=='__main__': B = get_blocks(A,[2,3]) B[0,0] += 7 print B + + assert np.all(unblock(B) == A) + + import ipdb; ipdb.set_trace() # XXX BREAKPOINT + From 6a1de2bfc2dccd30c20a0bb30902a283eca1b6d1 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Wed, 12 Feb 2014 10:39:15 +0000 Subject: [PATCH 03/10] Added block matrix dot product --- GPy/util/block_matrices.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py index cdbb1b0b..464e3ba1 100644 --- a/GPy/util/block_matrices.py +++ b/GPy/util/block_matrices.py @@ -35,6 +35,37 @@ def unblock(B): count_i += i return A +def block_dot(A, B): + """ + Element wise dot product on block matricies + + +------+------+ +------+------+ +-------+-------+ + | | | | | | |A11.B11|B12.B12| + | A11 | A12 | | B11 | B12 | | | | + +------+------+ o +------+------| = +-------+-------+ + | | | | | | |A21.B21|A22.B22| + | A21 | A22 | | B21 | B22 | | | | + +-------------+ +------+------+ +-------+-------+ + + ..Note + If either (A or B) of the diagonal matrices are stored as vectors then a more + efficient dot product using numpy broadcasting will be used, i.e. A11*B11 + """ + #Must have same number of blocks and be a block matrix + assert A.dtype is np.dtype('object'), "Must be a block matrix" + assert B.dtype is np.dtype('object'), "Must be a block matrix" + Ashape = A.shape + Bshape = B.shape + assert Ashape == Bshape + def f(A,B): + if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]: + #FIXME: Careful if one is transpose of other, would make a matrix + return A*B + else: + return np.dot(A,B) + dot = np.vectorize(f, otypes = [np.object]) + return dot(A,B) + if __name__=='__main__': A = np.zeros((5,5)) @@ -43,6 +74,3 @@ if __name__=='__main__': print B assert np.all(unblock(B) == A) - - import ipdb; ipdb.set_trace() # XXX BREAKPOINT - From 0ea3d336957372a9ee7e40b9db116c881e99279b Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 27 Mar 2015 14:17:03 +0000 Subject: [PATCH 04/10] Merging with private repo, mostly fixed --- .../latent_function_inference/__init__.py | 12 +- .../latent_function_inference/laplace.py | 205 +++++++- GPy/likelihoods/bernoulli.py | 38 ++ GPy/likelihoods/gaussian.py | 19 +- GPy/likelihoods/likelihood.py | 255 ++++++++-- GPy/likelihoods/student_t.py | 7 +- GPy/testing/likelihood_tests.py | 472 +++++++++--------- GPy/util/misc.py | 78 ++- 8 files changed, 768 insertions(+), 318 deletions(-) diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 67f57638..dc7789ba 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -50,19 +50,19 @@ class InferenceMethodList(LatentFunctionInference, list): def on_optimization_end(self): for inf in self: inf.on_optimization_end() - + def __getstate__(self): state = [] for inf in self: state.append(inf) return state - + def __setstate__(self, state): for inf in state: self.append(inf) from exact_gaussian_inference import ExactGaussianInference -from laplace import Laplace +from laplace import Laplace, LaplaceBlock from GPy.inference.latent_function_inference.var_dtc import VarDTC from expectation_propagation import EP from expectation_propagation_dtc import EPDTC @@ -78,9 +78,9 @@ from svgp import SVGP # class EMLikeLatentFunctionInference(LatentFunctionInference): # def update_approximation(self): # """ -# This function gets called when the +# This function gets called when the # """ -# +# # def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): # """ # Do inference on the latent functions given a covariance function `kern`, @@ -88,7 +88,7 @@ from svgp import SVGP # Additional metadata for the outputs `Y` can be given in `Y_metadata`. # """ # raise NotImplementedError, "Abstract base class for full inference" -# +# # class VariationalLatentFunctionInference(LatentFunctionInference): # def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): # """ diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 05711b0b..4e25b4b1 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -43,28 +43,31 @@ class Laplace(LatentFunctionInference): """ Returns a Posterior class containing essential quantities of the posterior """ - # Compute K K = kern.K(X) #Find mode if self.bad_fhat or self.first_run: Ki_f_init = np.zeros_like(Y) - first_run = False + self.first_run = False else: Ki_f_init = self._previous_Ki_fhat + Ki_f_init = np.zeros_like(Y)# FIXME: take this out + f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) + self.f_hat = f_hat - self.Ki_fhat = Ki_fhat - self.K = K.copy() + #self.Ki_fhat = Ki_fhat + #self.K = K.copy() + #Compute hessian and other variables at mode log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) self._previous_Ki_fhat = Ki_fhat.copy() return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} - def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): + def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs): """ Rasmussen's numerically stable mode finding For nomenclature see Rasmussen & Williams 2006 @@ -89,7 +92,12 @@ class Laplace(LatentFunctionInference): #define the objective function (to be maximised) def obj(Ki_f, f): - return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata)) + ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata)) + if np.isnan(ll): + return -np.inf + else: + return ll + difference = np.inf iteration = 0 @@ -104,7 +112,7 @@ class Laplace(LatentFunctionInference): W_f = W*f b = W_f + grad # R+W p46 line 6. - W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave) + W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs) W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b)) #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet @@ -121,7 +129,9 @@ class Laplace(LatentFunctionInference): step = optimize.brent(inner_obj, tol=1e-4, maxiter=12) Ki_f_new = Ki_f + step*dKi_f f_new = np.dot(K, Ki_f_new) - + #print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f)) + if obj(Ki_f_new, f_new) < obj(Ki_f, f): + raise ValueError("Shouldn't happen, brent optimization failing") difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) Ki_f = Ki_f_new f = f_new @@ -152,14 +162,10 @@ class Laplace(LatentFunctionInference): if np.any(np.isnan(W)): raise ValueError('One or more element(s) of W is NaN') - K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) - - #compute vital matrices - C = np.dot(LiW12, K) - Ki_W_i = K - C.T.dot(C) + K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave) #compute the log marginal - log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L))) + log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW # Compute matrices for derivatives dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat @@ -196,23 +202,23 @@ class Laplace(LatentFunctionInference): dL_dthetaL = np.zeros(num_params) for thetaL_i in range(num_params): #Explicit - dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i]) + dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :]) # The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL - + 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten()) + + 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :])) ) #Implicit - dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i]) - #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i]) + dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :]) + #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :]) dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL) - dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp + dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp) else: dL_dthetaL = np.zeros(likelihood.size) return log_marginal, K_Wi_i, dL_dK, dL_dthetaL - def _compute_B_statistics(self, K, W, log_concave): + def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs): """ Rasmussen suggests the use of a numerically stable positive definite matrix B Which has a positive diagonal elements and can be easily inverted @@ -225,7 +231,7 @@ class Laplace(LatentFunctionInference): """ if not log_concave: #print "Under 1e-10: {}".format(np.sum(W < 1e-6)) - W[W<1e-6] = 1e-6 + W = np.clip(W, 1e-6, 1e+30) # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!! #W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance @@ -247,5 +253,160 @@ class Laplace(LatentFunctionInference): #K_Wi_i_2 , _= dpotri(L2) #symmetrify(K_Wi_i_2) - return K_Wi_i, L, LiW12 + #compute vital matrices + C = np.dot(LiW12, K) + Ki_W_i = K - C.T.dot(C) + I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i) + logdet_I_KW = 2*np.sum(np.log(np.diag(L))) + + return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i + +class LaplaceBlock(Laplace): + def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs): + Ki_f = Ki_f_init.copy() + f = np.dot(K, Ki_f) + + #define the objective function (to be maximised) + def obj(Ki_f, f): + ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata)) + if np.isnan(ll): + return -np.inf + else: + return ll + + difference = np.inf + iteration = 0 + + I = np.eye(K.shape[0]) + while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: + W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) + + W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30) + + W_f = np.dot(W, f) + grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) + + b = W_f + grad # R+W p46 line 6. + K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs) + + #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet + #a = (I - (K+Wi)i*K)*b + full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b) + dKi_f = full_step_Ki_f - Ki_f + + #define an objective for the line search (minimize this one) + def inner_obj(step_size): + Ki_f_trial = Ki_f + step_size*dKi_f + f_trial = np.dot(K, Ki_f_trial) + return -obj(Ki_f_trial, f_trial) + + #use scipy for the line search, the compute new values of f, Ki_f + step = optimize.brent(inner_obj, tol=1e-4, maxiter=12) + + Ki_f_new = Ki_f + step*dKi_f + f_new = np.dot(K, Ki_f_new) + + difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) + Ki_f = Ki_f_new + f = f_new + iteration += 1 + + #Warn of bad fits + if difference > self._mode_finding_tolerance: + if not self.bad_fhat: + warnings.warn("Not perfect f_hat fit difference: {}".format(difference)) + self._previous_Ki_fhat = np.zeros_like(Y) + self.bad_fhat = True + elif self.bad_fhat: + self.bad_fhat = False + warnings.warn("f_hat now fine again") + if iteration > self._mode_finding_max_iter: + warnings.warn("didn't find the best") + + return f, Ki_f + + def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata): + #At this point get the hessian matrix (or vector as W is diagonal) + W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) + + W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30) + + K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave) + + #compute the log marginal + #FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check + log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det + + #Compute vival matrices for derivatives + dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat + + #dL_dfhat = np.zeros((f_hat.shape[0])) + #for i in range(f_hat.shape[0]): + #dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i])) + + dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df) + + woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata) + + #################### + #compute dL_dK# + #################### + if kern.size > 0 and not kern.is_fixed: + #Explicit + explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) + + #Implicit + implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i) + #implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i) + + dL_dK = explicit_part + implicit_part + else: + dL_dK = np.zeros_like(K) + + #################### + #compute dL_dthetaL# + #################### + if likelihood.size > 0 and not likelihood.is_fixed: + raise NotImplementedError + else: + dL_dthetaL = np.zeros(likelihood.size) + + #self.K_Wi_i = K_Wi_i + #self.Ki_W_i = Ki_W_i + #self.W = W + #self.K = K + #self.dL_dfhat = dL_dfhat + #self.explicit_part = explicit_part + #self.implicit_part = implicit_part + return log_marginal, K_Wi_i, dL_dK, dL_dthetaL + + def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs): + """ + Rasmussen suggests the use of a numerically stable positive definite matrix B + Which has a positive diagonal element and can be easyily inverted + + :param K: Prior Covariance matrix evaluated at locations X + :type K: NxN matrix + :param W: Negative hessian at a point (diagonal matrix) + :type W: Vector of diagonal values of hessian (1xN) + :returns: (K_Wi_i, L_B, not_provided) + """ + #w = GPy.util.diag.view(W) + #W[:] = np.where(w<1e-6, 1e-6, w) + + #B = I + KW + B = np.eye(K.shape[0]) + np.dot(K, W) + #Bi, L, Li, logdetB = pdinv(B) + Bi = np.linalg.inv(B) + + #K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K) + K_Wi_i = np.dot(W, Bi) + + #self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W)) + #self.B = B + #self.Bi = Bi + Ki_W_i = np.dot(Bi, K) + + sign, logdetB = np.linalg.slogdet(B) + return K_Wi_i, sign*logdetB, Bi, Ki_W_i diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 26de274b..6277c1dc 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -248,3 +248,41 @@ class Bernoulli(Likelihood): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): pass + + def variational_expectations(self, Y, m, v, gh_points=None): + """ + Probit specific numerical stable integrations + """ + #Move to be faster + if self.gp_link: + pass + Yshape = Y.shape + mshape = m.shape + vshape = v.shape + Y = Y.flatten() + m = m.flatten() + v = v.flatten() + + assert Yshape == mshape + assert mshape == vshape + + Ysign = np.where(Y==1,1,-1).flatten() + gh_x, gh_w = np.polynomial.hermite.hermgauss(20) + + #Shapes a bit weird + X = gh_x[None,:]*np.sqrt(2.*v[:, None]) + (m*Ysign)[:,None] + p = stats.norm.cdf(X) + p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability + N = stats.norm.pdf(X) + F = np.log(p).dot(gh_w) + NoverP = N/p + dF_dm = (NoverP*Ysign[:,None]).dot(gh_w) + dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w) + if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)): + stop + if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)): + stop + #FIXME: Might be wrong reshaping + return F.reshape(Yshape), dF_dm.reshape(mshape), dF_dv.reshape(vshape), None + + diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 4e7de9e3..021ec269 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -34,7 +34,9 @@ class Gaussian(Likelihood): if gp_link is None: gp_link = link_functions.Identity() - assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link" + if not isinstance(gp_link, link_functions.Identity): + print "Warning, Exact inference is not implemeted for non-identity link functions,\ + if you are not already, ensure Laplace inference_method is used" super(Gaussian, self).__init__(gp_link, name=name) @@ -263,16 +265,19 @@ class Gaussian(Likelihood): return d2logpdf_dlink2_dvar def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): - dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) - return dlogpdf_dvar + dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1])) + dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) + return dlogpdf_dtheta def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): - dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) - return dlogpdf_dlink_dvar + dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1])) + dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) + return dlogpdf_dlink_dtheta def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): - d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) - return d2logpdf_dlink2_dvar + d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1])) + d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) + return d2logpdf_dlink2_dtheta def _mean(self, gp): """ diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index b1e78b93..ee2f5368 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -5,7 +5,7 @@ import numpy as np from scipy import stats,special import scipy as sp import link_functions -from ..util.misc import chain_1, chain_2, chain_3 +from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian from scipy.integrate import quad import warnings from ..core.parameterization import Parameterized @@ -39,6 +39,7 @@ class Likelihood(Parameterized): assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation." self.gp_link = gp_link self.log_concave = False + self.not_block_really = False def _gradients(self,partial): return np.zeros(0) @@ -189,20 +190,27 @@ class Likelihood(Parameterized): """ #conditional_mean: the edpected value of y given some f, under this likelihood + fmin = -np.inf + fmax = np.inf def int_mean(f,m,v): - p = np.exp(-(0.5/v)*np.square(f - m)) + exponent = -(0.5/v)*np.square(f - m) + #If exponent is under -30 then exp(exponent) will be very small, so don't exp it!) #If p is zero then conditional_mean will overflow + assert v.all() > 0 + p = safe_exp(exponent) + + #If p is zero then conditional_variance will overflow if p < 1e-10: return 0. else: return self.conditional_mean(f)*p - scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) return mean def _conditional_mean(self, f): - """Quadrature calculation of the conditional mean: E(Y_star|f)""" + """Quadrature calculation of the conditional mean: E(Y_star|f_star)""" raise NotImplementedError, "implement this function to make predictions" def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): @@ -210,7 +218,7 @@ class Likelihood(Parameterized): Approximation to the predictive variance: V(Y_star) The following variance decomposition is used: - V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2 :param mu: mean of posterior :param sigma: standard deviation of posterior @@ -220,15 +228,22 @@ class Likelihood(Parameterized): #sigma2 = sigma**2 normalizer = np.sqrt(2*np.pi*variance) + fmin_v = -np.inf + fmin_m = np.inf + fmin = -np.inf + fmax = np.inf + + from ..util.misc import safe_exp # E( V(Y_star|f_star) ) def int_var(f,m,v): - p = np.exp(-(0.5/v)*np.square(f - m)) + exponent = -(0.5/v)*np.square(f - m) + p = safe_exp(exponent) #If p is zero then conditional_variance will overflow if p < 1e-10: return 0. else: return self.conditional_variance(f)*p - scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] exp_var = np.array(scaled_exp_variance)[:,None] / normalizer #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 @@ -240,14 +255,15 @@ class Likelihood(Parameterized): #E( E(Y_star|f_star)**2 ) def int_pred_mean_sq(f,m,v,predictive_mean_sq): - p = np.exp(-(0.5/v)*np.square(f - m)) + exponent = -(0.5/v)*np.square(f - m) + p = np.exp(exponent) #If p is zero then conditional_mean**2 will overflow if p < 1e-10: return 0. else: return self.conditional_mean(f)**2*p - scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] + scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer var_exp = exp_exp2 - predictive_mean_sq @@ -295,8 +311,18 @@ class Likelihood(Parameterized): :returns: likelihood evaluated for this point :rtype: float """ - inv_link_f = self.gp_link.transf(f) - return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) + if isinstance(self.gp_link, link_functions.Identity): + return self.pdf_link(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) + + def logpdf_sum(self, f, y, Y_metadata=None): + """ + Convenience function that can overridden for functions where this could + be computed more efficiently (Theano?) + """ + return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata)) def logpdf(self, f, y, Y_metadata=None): """ @@ -313,8 +339,11 @@ class Likelihood(Parameterized): :returns: log likelihood evaluated for this point :rtype: float """ - inv_link_f = self.gp_link.transf(f) - return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) + if isinstance(self.gp_link, link_functions.Identity): + return self.logpdf_link(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) def dlogpdf_df(self, f, y, Y_metadata=None): """ @@ -332,11 +361,15 @@ class Likelihood(Parameterized): :returns: derivative of log likelihood evaluated for this point :rtype: 1xN array """ - inv_link_f = self.gp_link.transf(f) - dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) - dlink_df = self.gp_link.dtransf_df(f) - return chain_1(dlogpdf_dlink, dlink_df) + if isinstance(self.gp_link, link_functions.Identity): + return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) + dlink_df = self.gp_link.dtransf_df(f) + return chain_1(dlogpdf_dlink, dlink_df) + @blockify_hessian def d2logpdf_df2(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the second derivative of log likelihood using it @@ -353,13 +386,18 @@ class Likelihood(Parameterized): :returns: second derivative of log likelihood evaluated for this point (diagonal only) :rtype: 1xN array """ - inv_link_f = self.gp_link.transf(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) - dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) - d2link_df2 = self.gp_link.d2transf_df2(f) - return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) + if isinstance(self.gp_link, link_functions.Identity): + d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) + d2link_df2 = self.gp_link.d2transf_df2(f) + d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) + return d2logpdf_df2 + @blockify_third def d3logpdf_df3(self, f, y, Y_metadata=None): """ Evaluates the link function link(f) then computes the third derivative of log likelihood using it @@ -376,64 +414,96 @@ class Likelihood(Parameterized): :returns: third derivative of log likelihood evaluated for this point :rtype: float """ - inv_link_f = self.gp_link.transf(f) - d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) - dlink_df = self.gp_link.dtransf_df(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) - d2link_df2 = self.gp_link.d2transf_df2(f) - dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) - d3link_df3 = self.gp_link.d3transf_df3(f) - return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) + if isinstance(self.gp_link, link_functions.Identity): + d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) + dlink_df = self.gp_link.dtransf_df(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) + d2link_df2 = self.gp_link.d2transf_df2(f) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) + d3link_df3 = self.gp_link.d3transf_df3(f) + d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) + return d3logpdf_df3 + def dlogpdf_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ if self.size > 0: - inv_link_f = self.gp_link.transf(f) - return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + if self.not_block_really: + raise NotImplementedError("Need to make a decorator for this!") + if isinstance(self.gp_link, link_functions.Identity): + return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) else: # There are no parameters so return an empty array for derivatives - return np.zeros([1, 0]) + return np.zeros((0, f.shape[0], f.shape[1])) def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ if self.size > 0: - inv_link_f = self.gp_link.transf(f) - dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) - return chain_1(dlogpdf_dlink_dtheta, dlink_df) + if self.not_block_really: + raise NotImplementedError("Need to make a decorator for this!") + if isinstance(self.gp_link, link_functions.Identity): + return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + + dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1])) + #Chain each parameter of hte likelihood seperately + for p in range(self.size): + dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df) + return dlogpdf_df_dtheta + #return chain_1(dlogpdf_dlink_dtheta, dlink_df) else: # There are no parameters so return an empty array for derivatives - return np.zeros([f.shape[0], 0]) + return np.zeros((0, f.shape[0], f.shape[1])) def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): """ TODO: Doc strings """ if self.size > 0: - inv_link_f = self.gp_link.transf(f) - dlink_df = self.gp_link.dtransf_df(f) - d2link_df2 = self.gp_link.d2transf_df2(f) - d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) - return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) + if self.not_block_really: + raise NotImplementedError("Need to make a decorator for this!") + if isinstance(self.gp_link, link_functions.Identity): + return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata) + else: + inv_link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + d2link_df2 = self.gp_link.d2transf_df2(f) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + + d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1])) + #Chain each parameter of hte likelihood seperately + for p in range(self.size): + d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2) + return d2logpdf_df2_dtheta + #return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) else: # There are no parameters so return an empty array for derivatives - return np.zeros([f.shape[0], 0]) + return np.zeros((0, f.shape[0], f.shape[1])) def _laplace_gradients(self, f, y, Y_metadata=None): - dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0) + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata) dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata) d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata) #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize - assert len(dlogpdf_dtheta) == self.size #1 x num_param array - assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix - assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix + assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array + assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param + assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta @@ -454,19 +524,98 @@ class Likelihood(Parameterized): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): #compute the quantiles by sampling!!! - N_samp = 1000 + N_samp = 50 s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu #ss_f = s.flatten() #ss_y = self.samples(ss_f, Y_metadata) + #ss_y = self.samples(s, Y_metadata, samples=100) ss_y = self.samples(s, Y_metadata) #ss_y = ss_y.reshape(mu.shape[0], N_samp) return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] - def samples(self, gp, Y_metadata=None): + def samples(self, gp, Y_metadata=None, samples=1): """ Returns a set of samples of observations based on a given value of the latent variable. :param gp: latent variable + :param samples: number of samples to take for each f location """ - raise NotImplementedError + raise NotImplementedError("""May be possible to use MCMC with user-tuning, see + MCMC_pdf_samples in likelihood.py and write samples function + using this, beware this is a simple implementation + of Metropolis and will not work well for all likelihoods""") + + def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None): + """ + Simple implementation of Metropolis sampling algorithm + + Will run a parallel chain for each input dimension (treats each f independently) + Thus assumes f*_1 independant of f*_2 etc. + + :param num_samples: Number of samples to take + :param fNew: f at which to sample around + :param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f + :param stepsize: Stepsize for the normal proposal distribution (will need modifying) + :param burnin: number of samples to use for burnin (will need modifying) + :param Y_metadata: Y_metadata for pdf + """ + print "Warning, using MCMC for sampling y*, needs to be tuned!" + if starting_loc is None: + starting_loc = fNew + from functools import partial + logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata) + pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None])) + #Should be the link function of f is a good starting point + #(i.e. the point before you corrupt it with the likelihood) + par_chains = starting_loc.shape[0] + chain_values = np.zeros((par_chains, num_samples)) + chain_values[:, 0][:,None] = starting_loc + #Use same stepsize for all par_chains + stepsize = np.ones(par_chains)*stepsize + accepted = np.zeros((par_chains, num_samples+burn_in)) + accept_ratio = np.zeros(num_samples+burn_in) + #Whilst burning in, only need to keep the previous lot + burnin_cache = np.zeros(par_chains) + burnin_cache[:] = starting_loc.flatten() + burning_in = True + for i in xrange(burn_in+num_samples): + next_ind = i-burn_in + if burning_in: + old_y = burnin_cache + else: + old_y = chain_values[:,next_ind-1] + + old_lik = pdf(old_y) + #Propose new y from Gaussian proposal + new_y = np.random.normal(loc=old_y, scale=stepsize) + new_lik = pdf(new_y) + #Accept using Metropolis (not hastings) acceptance + #Always accepts if new_lik > old_lik + accept_probability = np.minimum(1, new_lik/old_lik) + u = np.random.uniform(0,1,par_chains) + #print "Accept prob: ", accept_probability + accepts = u < accept_probability + if burning_in: + burnin_cache[accepts] = new_y[accepts] + burnin_cache[~accepts] = old_y[~accepts] + if i == burn_in: + burning_in = False + chain_values[:,0] = burnin_cache + else: + #If it was accepted then new_y becomes the latest sample + chain_values[accepts, next_ind] = new_y[accepts] + #Otherwise use old y as the sample + chain_values[~accepts, next_ind] = old_y[~accepts] + + accepted[~accepts, i] = 0 + accepted[accepts, i] = 1 + accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains) + + #Show progress + if i % int((burn_in+num_samples)*0.1) == 0: + print "{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i) + print "Last run accept ratio: ", accept_ratio[i] + + print "Average accept ratio: ", np.mean(accept_ratio) + return chain_values diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index dbd4d94f..f16a55e9 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -226,17 +226,18 @@ class StudentT(Likelihood): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet - return np.hstack((dlogpdf_dvar, dlogpdf_dv)) + return np.array((dlogpdf_dvar, dlogpdf_dv)) def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet - return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) + return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet - return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) + + return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def predictive_mean(self, mu, sigma, Y_metadata=None): # The comment here confuses mean and median. diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 877d1aa0..7b6164c1 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -10,7 +10,7 @@ from GPy.likelihoods import link_functions from GPy.core.parameterization import Param from functools import partial #np.random.seed(300) -#np.random.seed(7) +#np.random.seed(4) #np.seterr(divide='raise') def dparam_partial(inst_func, *args): @@ -52,8 +52,17 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, zipped_params = zip(params, params_names) for param_ind, (param_val, param_name) in enumerate(zipped_params): #Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter - fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0] - dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0] + f_ = partial_f(param_val, param_name) + df_ = partial_df(param_val, param_name) + #Reshape it such that we have a 3d matrix incase, that is we want it (?, N, D) regardless of whether ? is num_params or not + f_ = f_.reshape(-1, f_.shape[0], f_.shape[1]) + df_ = df_.reshape(-1, f_.shape[0], f_.shape[1]) + + #Get the number of f and number of dimensions + fnum = f_.shape[-2] + fdim = f_.shape[-1] + dfnum = df_.shape[-2] + for fixed_val in range(dfnum): #dlik and dlik_dvar gives back 1 value for each f_ind = min(fnum, fixed_val+1) - 1 @@ -61,9 +70,13 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, #Make grad checker with this param moving, note that set_params is NOT being called #The parameter is being set directly with __setattr__ #Check only the parameter and function value we wish to check at a time - grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind], - lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind], - param_val, [param_name]) + #func = lambda p_val, fnum, fdim, param_ind, f_ind, param_ind: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :] + #dfunc_dparam = lambda d_val, fnum, fdim, param_ind, fixed_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :] + + #First we reshape the output such that it is (num_params, N, D) then we pull out the relavent parameter-findex and checkgrad just this index at a time + func = lambda p_val: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :] + dfunc_dparam = lambda d_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :] + grad = GradientChecker(func, dfunc_dparam, param_val, [param_name]) if constraints is not None: for constrain_param, constraint in constraints: @@ -104,37 +117,9 @@ class TestNoiseModels(object): self.var = 0.2 - self.var = np.random.rand(1) - #Make a bigger step as lower bound can be quite curved self.step = 1e-4 - def tearDown(self): - self.Y = None - self.f = None - self.X = None - - def test_scale2_models(self): - self.setUp() - - #################################################### - # Constraint wrappers so we can just list them off # - #################################################### - def constrain_fixed(regex, model): - model[regex].constrain_fixed() - - def constrain_negative(regex, model): - model[regex].constrain_negative() - - def constrain_positive(regex, model): - model[regex].constrain_positive() - - def constrain_bounded(regex, model, lower, upper): - """ - Used like: partial(constrain_bounded, lower=0, upper=1) - """ - model[regex].constrain_bounded(lower, upper) - """ Dictionary where we nest models we would like to check Name: { @@ -149,136 +134,170 @@ class TestNoiseModels(object): "link_f_constraints": [constraint_wrappers, listed_here] } """ - noise_models = {"Student_t_default": { - "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [self.var], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - #"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] - }, - "laplace": True - }, - "Student_t_1_var": { - "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [1.0], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Student_t_small_deg_free": { - "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [self.var], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Student_t_small_var": { - "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [0.001], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Student_t_large_var": { - "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [10.0], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Student_t_approx_gauss": { - "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [self.var], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Student_t_log": { - "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), - "grad_params": { - "names": [".*t_scale2"], - "vals": [self.var], - "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] - }, - "laplace": True - }, - "Gaussian_default": { - "model": GPy.likelihoods.Gaussian(variance=self.var), - "grad_params": { - "names": [".*variance"], - "vals": [self.var], - "constraints": [(".*variance", constrain_positive)] - }, - "laplace": True, - "ep": False # FIXME: Should be True when we have it working again - }, - #"Gaussian_log": { - #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N), - #"grad_params": { - #"names": ["noise_model_variance"], - #"vals": [self.var], - #"constraints": [constrain_positive] - #}, - #"laplace": True - #}, - #"Gaussian_probit": { - #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N), - #"grad_params": { - #"names": ["noise_model_variance"], - #"vals": [self.var], - #"constraints": [constrain_positive] - #}, - #"laplace": True - #}, - #"Gaussian_log_ex": { - #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N), - #"grad_params": { - #"names": ["noise_model_variance"], - #"vals": [self.var], - #"constraints": [constrain_positive] - #}, - #"laplace": True - #}, - "Bernoulli_default": { - "model": GPy.likelihoods.Bernoulli(), - "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], - "laplace": True, - "Y": self.binary_Y, - "ep": False # FIXME: Should be True when we have it working again - }, - "Exponential_default": { - "model": GPy.likelihoods.Exponential(), - "link_f_constraints": [constrain_positive], - "Y": self.positive_Y, - "laplace": True, - }, - "Poisson_default": { - "model": GPy.likelihoods.Poisson(), - "link_f_constraints": [constrain_positive], - "Y": self.integer_Y, - "laplace": True, - "ep": False #Should work though... - }#, - #GAMMA needs some work!"Gamma_default": { - #"model": GPy.likelihoods.Gamma(), - #"link_f_constraints": [constrain_positive], - #"Y": self.positive_Y, - #"laplace": True - #} - } + self.noise_models = {"Student_t_default": { + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [self.var], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + "Student_t_1_var": { + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [1.0], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + "Student_t_small_deg_free": { + "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [self.var], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + "Student_t_small_var": { + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [0.001], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + "Student_t_large_var": { + "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [10.0], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + "Student_t_approx_gauss": { + "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), + "grad_params": { + "names": [".*t_scale2"], + "vals": [self.var], + "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + }, + "laplace": True + }, + #"Student_t_log": { + #"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), + #"grad_params": { + #"names": [".*t_noise"], + #"vals": [self.var], + #"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)] + #}, + #"laplace": True + #}, + "Gaussian_default": { + "model": GPy.likelihoods.Gaussian(variance=self.var), + "grad_params": { + "names": [".*variance"], + "vals": [self.var], + "constraints": [(".*variance", self.constrain_positive)] + }, + "laplace": True, + "ep": False # FIXME: Should be True when we have it working again + }, + "Gaussian_log": { + "model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var), + "grad_params": { + "names": [".*variance"], + "vals": [self.var], + "constraints": [(".*variance", self.constrain_positive)] + }, + "laplace": True + }, + #"Gaussian_probit": { + #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N), + #"grad_params": { + #"names": ["noise_model_variance"], + #"vals": [self.var], + #"constraints": [constrain_positive] + #}, + #"laplace": True + #}, + #"Gaussian_log_ex": { + #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N), + #"grad_params": { + #"names": ["noise_model_variance"], + #"vals": [self.var], + #"constraints": [constrain_positive] + #}, + #"laplace": True + #}, + "Bernoulli_default": { + "model": GPy.likelihoods.Bernoulli(), + "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)], + "laplace": True, + "Y": self.binary_Y, + "ep": False # FIXME: Should be True when we have it working again + }, + "Exponential_default": { + "model": GPy.likelihoods.Exponential(), + "link_f_constraints": [self.constrain_positive], + "Y": self.positive_Y, + "laplace": True, + }, + "Poisson_default": { + "model": GPy.likelihoods.Poisson(), + "link_f_constraints": [self.constrain_positive], + "Y": self.integer_Y, + "laplace": True, + "ep": False #Should work though... + }, + #, + #GAMMA needs some work!"Gamma_default": { + #"model": GPy.likelihoods.Gamma(), + #"link_f_constraints": [constrain_positive], + #"Y": self.positive_Y, + #"laplace": True + #} + } - for name, attributes in noise_models.iteritems(): + + #################################################### + # Constraint wrappers so we can just list them off # + #################################################### + def constrain_fixed(self, regex, model): + model[regex].constrain_fixed() + + def constrain_negative(self, regex, model): + model[regex].constrain_negative() + + def constrain_positive(self, regex, model): + model[regex].constrain_positive() + + def constrain_fixed_below(self, regex, model, up_to): + model[regex][0:up_to].constrain_fixed() + + def constrain_fixed_above(self, regex, model, above): + model[regex][above:].constrain_fixed() + + def constrain_bounded(self, regex, model, lower, upper): + """ + Used like: partial(constrain_bounded, lower=0, upper=1) + """ + model[regex].constrain_bounded(lower, upper) + + + def tearDown(self): + self.Y = None + self.f = None + self.X = None + + def test_scale2_models(self): + self.setUp() + + for name, attributes in self.noise_models.iteritems(): model = attributes["model"] if "grad_params" in attributes: params = attributes["grad_params"] @@ -290,7 +309,7 @@ class TestNoiseModels(object): param_vals = [] param_names = [] constrain_positive = [] - param_constraints = [] # ??? TODO: Saul to Fix. + param_constraints = [] if "link_f_constraints" in attributes: link_f_constraints = attributes["link_f_constraints"] else: @@ -303,6 +322,10 @@ class TestNoiseModels(object): f = attributes["f"].copy() else: f = self.f.copy() + if "Y_metadata" in attributes: + Y_metadata = attributes["Y_metadata"].copy() + else: + Y_metadata = None if "laplace" in attributes: laplace = attributes["laplace"] else: @@ -317,30 +340,30 @@ class TestNoiseModels(object): #Required by all #Normal derivatives - yield self.t_logpdf, model, Y, f - yield self.t_dlogpdf_df, model, Y, f - yield self.t_d2logpdf_df2, model, Y, f + yield self.t_logpdf, model, Y, f, Y_metadata + yield self.t_dlogpdf_df, model, Y, f, Y_metadata + yield self.t_d2logpdf_df2, model, Y, f, Y_metadata #Link derivatives - yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints - yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints + yield self.t_dlogpdf_dlink, model, Y, f, Y_metadata, link_f_constraints + yield self.t_d2logpdf_dlink2, model, Y, f, Y_metadata, link_f_constraints if laplace: #Laplace only derivatives - yield self.t_d3logpdf_df3, model, Y, f - yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints + yield self.t_d3logpdf_df3, model, Y, f, Y_metadata + yield self.t_d3logpdf_dlink3, model, Y, f, Y_metadata, link_f_constraints #Params - yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints - yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints - yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_dlogpdf_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints + yield self.t_dlogpdf_df_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints + yield self.t_d2logpdf2_df2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints #Link params - yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints - yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints - yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints + yield self.t_dlogpdf_link_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints + yield self.t_dlogpdf_dlink_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints + yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints #laplace likelihood gradcheck - yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints if ep: #ep likelihood gradcheck - yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints self.tearDown() @@ -349,41 +372,41 @@ class TestNoiseModels(object): # dpdf_df's # ############# @with_setup(setUp, tearDown) - def t_logpdf(self, model, Y, f): + def t_logpdf(self, model, Y, f, Y_metadata): print "\n{}".format(inspect.stack()[0][3]) print model #print model._get_params() np.testing.assert_almost_equal( - model.pdf(f.copy(), Y.copy()).prod(), - np.exp(model.logpdf(f.copy(), Y.copy()).sum()) + model.pdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).prod(), + np.exp(model.logpdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).sum()) ) @with_setup(setUp, tearDown) - def t_dlogpdf_df(self, model, Y, f): + def t_dlogpdf_df(self, model, Y, f, Y_metadata): print "\n{}".format(inspect.stack()[0][3]) self.description = "\n{}".format(inspect.stack()[0][3]) - logpdf = functools.partial(np.sum(model.logpdf), y=Y) - dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) + logpdf = functools.partial(np.sum(model.logpdf), y=Y, Y_metadata=Y_metadata) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') grad.randomize() print model assert grad.checkgrad(verbose=1) @with_setup(setUp, tearDown) - def t_d2logpdf_df2(self, model, Y, f): + def t_d2logpdf_df2(self, model, Y, f, Y_metadata): print "\n{}".format(inspect.stack()[0][3]) - dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) - d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g') grad.randomize() print model assert grad.checkgrad(verbose=1) @with_setup(setUp, tearDown) - def t_d3logpdf_df3(self, model, Y, f): + def t_d3logpdf_df3(self, model, Y, f, Y_metadata): print "\n{}".format(inspect.stack()[0][3]) - d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) - d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata) + d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g') grad.randomize() print model @@ -393,32 +416,32 @@ class TestNoiseModels(object): # df_dparams # ############## @with_setup(setUp, tearDown) - def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints): + def t_dlogpdf_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, - params, params_names, args=(f, Y), constraints=param_constraints, + params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints): + def t_dlogpdf_df_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, - params, params_names, args=(f, Y), constraints=param_constraints, + params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints): + def t_d2logpdf2_df2_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, - params, params_names, args=(f, Y), constraints=param_constraints, + params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @@ -426,10 +449,10 @@ class TestNoiseModels(object): # dpdf_dlink's # ################ @with_setup(setUp, tearDown) - def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints): + def t_dlogpdf_dlink(self, model, Y, f, Y_metadata, link_f_constraints): print "\n{}".format(inspect.stack()[0][3]) - logpdf = functools.partial(model.logpdf_link, y=Y) - dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) + logpdf = functools.partial(model.logpdf_link, y=Y, Y_metadata=Y_metadata) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g') #Apply constraints to link_f values @@ -442,10 +465,10 @@ class TestNoiseModels(object): assert grad.checkgrad(verbose=1) @with_setup(setUp, tearDown) - def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints): + def t_d2logpdf_dlink2(self, model, Y, f, Y_metadata, link_f_constraints): print "\n{}".format(inspect.stack()[0][3]) - dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) - d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g') #Apply constraints to link_f values @@ -458,10 +481,10 @@ class TestNoiseModels(object): assert grad.checkgrad(verbose=1) @with_setup(setUp, tearDown) - def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints): + def t_d3logpdf_dlink3(self, model, Y, f, Y_metadata, link_f_constraints): print "\n{}".format(inspect.stack()[0][3]) - d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) - d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata) + d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y, Y_metadata=Y_metadata) grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g') #Apply constraints to link_f values @@ -477,32 +500,32 @@ class TestNoiseModels(object): # dlink_dparams # ################# @with_setup(setUp, tearDown) - def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints): + def t_dlogpdf_link_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, - params, param_names, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints): + def t_dlogpdf_dlink_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, - params, param_names, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @with_setup(setUp, tearDown) - def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints): + def t_d2logpdf2_dlink2_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints): print "\n{}".format(inspect.stack()[0][3]) print model assert ( dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, - params, param_names, args=(f, Y), constraints=param_constraints, + params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints, randomize=False, verbose=True) ) @@ -510,14 +533,15 @@ class TestNoiseModels(object): # laplace test # ################ @with_setup(setUp, tearDown) - def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + def t_laplace_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints): print "\n{}".format(inspect.stack()[0][3]) #Normalize Y = Y/Y.max() - white_var = 1e-6 + white_var = 1e-5 kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1]) laplace_likelihood = GPy.inference.latent_function_inference.Laplace() - m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood) + + m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=laplace_likelihood) m['.*white'].constrain_fixed(white_var) #Set constraints @@ -526,6 +550,7 @@ class TestNoiseModels(object): print m m.randomize() + m.randomize() #Set params for param_num in range(len(param_names)): @@ -545,14 +570,15 @@ class TestNoiseModels(object): # EP test # ########### @with_setup(setUp, tearDown) - def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + def t_ep_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints): print "\n{}".format(inspect.stack()[0][3]) #Normalize Y = Y/Y.max() white_var = 1e-6 kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1]) ep_inf = GPy.inference.latent_function_inference.EP() - m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf) + + m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf) m['.*white'].constrain_fixed(white_var) for param_num in range(len(param_names)): @@ -571,8 +597,8 @@ class LaplaceTests(unittest.TestCase): """ def setUp(self): - self.N = 5 - self.D = 3 + self.N = 15 + self.D = 1 self.X = np.random.rand(self.N, self.D)*10 self.real_std = 0.1 @@ -636,20 +662,20 @@ class LaplaceTests(unittest.TestCase): exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference() m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf) m1['.*white'].constrain_fixed(1e-6) - m1['.*rbf.variance'] = initial_var_guess - m1['.*rbf.variance'].constrain_bounded(1e-4, 10) + m1['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10) m1.randomize() gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess) laplace_inf = GPy.inference.latent_function_inference.Laplace() m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf) m2['.*white'].constrain_fixed(1e-6) - m2['.*rbf.variance'].constrain_bounded(1e-4, 10) + m2['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10) m2.randomize() if debug: print m1 print m2 + optimizer = 'scg' print "Gaussian" m1.optimize(optimizer, messages=debug, ipython_notebook=False) @@ -687,8 +713,6 @@ class LaplaceTests(unittest.TestCase): pb.scatter(X, m1.likelihood.Y, c='g') pb.scatter(X, m2.likelihood.Y, c='r', marker='x') - - #Check Y's are the same np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5) #Check marginals are the same diff --git a/GPy/util/misc.py b/GPy/util/misc.py index bf37159d..99bd62b3 100644 --- a/GPy/util/misc.py +++ b/GPy/util/misc.py @@ -4,6 +4,16 @@ import numpy as np from config import * +_lim_val = np.finfo(np.float64).max + +_lim_val_exp = np.log(_lim_val) +_lim_val_square = np.sqrt(_lim_val) +_lim_val_cube = np.power(_lim_val, -3) + +def safe_exp(f): + clip_f = np.clip(f, -np.inf, _lim_val_exp) + return np.exp(clip_f) + def chain_1(df_dg, dg_dx): """ Generic chaining function for first derivative @@ -11,6 +21,11 @@ def chain_1(df_dg, dg_dx): .. math:: \\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx} """ + if np.all(dg_dx==1.): + return df_dg + if len(df_dg) > 1 and df_dg.shape[-1] > 1: + import ipdb; ipdb.set_trace() # XXX BREAKPOINT + raise NotImplementedError('Not implemented for matricies yet') return df_dg * dg_dx def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2): @@ -20,7 +35,13 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2): .. math:: \\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}} """ - return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2 + if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0): + return d2f_dg2 + if len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1: + raise NotImplementedError('Not implemented for matricies yet') + #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2 + dg_dx_2 = dg_dx**2 + return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2 def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3): """ @@ -29,11 +50,18 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3): .. math:: \\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}} """ - return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3 + if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0): + return d3f_dg3 + if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1) + or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)): + raise NotImplementedError('Not implemented for matricies yet') + #dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3 + dg_dx_3 = dg_dx**3 + return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3 def opt_wrapper(m, **kwargs): """ - This function just wraps the optimization procedure of a GPy + Thit function just wraps the optimization procedure of a GPy object so that optimize() pickleable (necessary for multiprocessing). """ m.optimize(**kwargs) @@ -96,3 +124,47 @@ from :class:ndarray)""" if len(param) == 1: return param[0].view(np.ndarray) return [x.view(np.ndarray) for x in param] + +def blockify_hessian(func): + def wrapper_func(self, *args, **kwargs): + # Invoke the wrapped function first + retval = func(self, *args, **kwargs) + # Now do something here with retval and/or action + if self.not_block_really and (retval.shape[0] != retval.shape[1]): + return np.diagflat(retval) + else: + return retval + return wrapper_func + +def blockify_third(func): + def wrapper_func(self, *args, **kwargs): + # Invoke the wrapped function first + retval = func(self, *args, **kwargs) + # Now do something here with retval and/or action + if self.not_block_really and (len(retval.shape) < 3): + num_data = retval.shape[0] + d3_block_cache = np.zeros((num_data, num_data, num_data)) + diag_slice = range(num_data) + d3_block_cache[diag_slice, diag_slice, diag_slice] = np.squeeze(retval) + return d3_block_cache + else: + return retval + return wrapper_func + +def blockify_dhess_dtheta(func): + def wrapper_func(self, *args, **kwargs): + # Invoke the wrapped function first + retval = func(self, *args, **kwargs) + # Now do something here with retval and/or action + if self.not_block_really and (len(retval.shape) < 3): + num_data = retval.shape[0] + num_params = retval.shape[-1] + dhess_dtheta = np.zeros((num_data, num_data, num_params)) + diag_slice = range(num_data) + for param_ind in range(num_params): + dhess_dtheta[diag_slice, diag_slice, param_ind] = np.squeeze(retval[:,param_ind]) + return dhess_dtheta + else: + return retval + return wrapper_func + From 4d27fddd375cda05a63579706defa3af0877c4a2 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 27 Mar 2015 14:24:24 +0000 Subject: [PATCH 05/10] Small tidying up --- GPy/likelihoods/bernoulli.py | 38 ----------------------------------- GPy/likelihoods/likelihood.py | 2 +- 2 files changed, 1 insertion(+), 39 deletions(-) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index c398b3a4..f5690aa4 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -248,41 +248,3 @@ class Bernoulli(Likelihood): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): pass - - def variational_expectations(self, Y, m, v, gh_points=None): - """ - Probit specific numerical stable integrations - """ - #Move to be faster - if self.gp_link: - pass - Yshape = Y.shape - mshape = m.shape - vshape = v.shape - Y = Y.flatten() - m = m.flatten() - v = v.flatten() - - assert Yshape == mshape - assert mshape == vshape - - Ysign = np.where(Y==1,1,-1).flatten() - gh_x, gh_w = np.polynomial.hermite.hermgauss(20) - - #Shapes a bit weird - X = gh_x[None,:]*np.sqrt(2.*v[:, None]) + (m*Ysign)[:,None] - p = stats.norm.cdf(X) - p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability - N = stats.norm.pdf(X) - F = np.log(p).dot(gh_w) - NoverP = N/p - dF_dm = (NoverP*Ysign[:,None]).dot(gh_w) - dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w) - if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)): - stop - if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)): - stop - #FIXME: Might be wrong reshaping - return F.reshape(Yshape), dF_dm.reshape(mshape), dF_dv.reshape(vshape), None - - diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index a545d54e..022670a5 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -325,7 +325,7 @@ class Likelihood(Parameterized): def logpdf_sum(self, f, y, Y_metadata=None): """ Convenience function that can overridden for functions where this could - be computed more efficiently (Theano?) + be computed more efficiently """ return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata)) From 1a253ff82a9b244866a0e20fe06444dda6c0bcd4 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 27 Mar 2015 15:14:52 +0000 Subject: [PATCH 06/10] Added safe_exp and tests --- GPy/likelihoods/likelihood.py | 2 +- GPy/testing/misc_tests.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 GPy/testing/misc_tests.py diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 022670a5..2e55ddb9 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -5,7 +5,7 @@ import numpy as np from scipy import stats,special import scipy as sp import link_functions -from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian +from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp from scipy.integrate import quad import warnings from ..core.parameterization import Parameterized diff --git a/GPy/testing/misc_tests.py b/GPy/testing/misc_tests.py new file mode 100644 index 00000000..e620fa7e --- /dev/null +++ b/GPy/testing/misc_tests.py @@ -0,0 +1,18 @@ +import numpy as np +import scipy as sp +import GPy + +class MiscTests(np.testing.TestCase): + """ + Testing some utilities of misc + """ + def setUp(self): + self._lim_val = np.finfo(np.float64).max + self._lim_val_exp = np.log(self._lim_val) + + def test_safe_exp_upper(self): + assert np.exp(self._lim_val_exp + 1) == np.inf + assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf + + def test_safe_exp_lower(self): + assert GPy.util.misc.safe_exp(1e-10) < np.inf From 582aa4f40618048abf597b25058f345684a98299 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 27 Mar 2015 15:30:40 +0000 Subject: [PATCH 07/10] More samples for predictive quantile --- GPy/likelihoods/likelihood.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 2e55ddb9..1295245c 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -529,7 +529,7 @@ class Likelihood(Parameterized): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): #compute the quantiles by sampling!!! - N_samp = 50 + N_samp = 500 s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu #ss_f = s.flatten() #ss_y = self.samples(ss_f, Y_metadata) From 4f0894b6b703aa21b50cde74c4847d4e917f3dd8 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 30 Mar 2015 15:25:59 +0100 Subject: [PATCH 08/10] change the name of kernel DiffGenomeKern to DEtime --- GPy/kern/__init__.py | 3 ++- GPy/kern/_src/splitKern.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 718be74f..0e1f8a0d 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -16,5 +16,6 @@ from _src.poly import Poly from _src.eq_ode2 import EQ_ODE2 from _src.trunclinear import TruncLinear,TruncLinear_inf -from _src.splitKern import SplitKern,DiffGenomeKern +from _src.splitKern import SplitKern,DEtime +from _src.splitKern import DEtime as DiffGenomeKern diff --git a/GPy/kern/_src/splitKern.py b/GPy/kern/_src/splitKern.py index 27e4f76b..3b2e5716 100644 --- a/GPy/kern/_src/splitKern.py +++ b/GPy/kern/_src/splitKern.py @@ -7,7 +7,7 @@ from kern import Kern,CombinationKernel from .independent_outputs import index_to_slices import itertools -class DiffGenomeKern(Kern): +class DEtime(Kern): def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'): self.idx_p = idx_p From edbb576bfcfd0755319961412d8f72a10c819ece Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 30 Mar 2015 21:49:02 +0100 Subject: [PATCH 09/10] fallback the implementation of spike and slab prior --- GPy/core/parameterization/variational.py | 32 ++++--------------- .../var_dtc_parallel.py | 6 ++-- GPy/kern/_src/psi_comp/ssrbf_psi_comp.py | 20 +++++++----- GPy/models/ss_gplvm.py | 5 ++- 4 files changed, 27 insertions(+), 36 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 7cc5c99a..43e8d096 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior): def KL_divergence(self, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance - gamma,gamma1 = variational_posterior.gamma_probabilities() - log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() + gamma = variational_posterior.gamma.values if len(self.pi.shape)==2: - idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) + idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) pi = self.pi[idx] else: pi = self.pi var_mean = np.square(mu)/self.variance var_S = (S/self.variance - np.log(S)) - var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum() + var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum() return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. def update_gradients_KL(self, variational_posterior): mu = variational_posterior.mean S = variational_posterior.variance - gamma,gamma1 = variational_posterior.gamma_probabilities() - log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() + gamma = variational_posterior.gamma.values if len(self.pi.shape)==2: - idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) + idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) pi = self.pi[idx] else: pi = self.pi - variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1 + variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. mu.gradient -= gamma*mu/self.variance S.gradient -= (1./self.variance - 1./S) * gamma /2. if self.learnPi: @@ -162,24 +160,8 @@ class SpikeAndSlabPosterior(VariationalPosterior): binary_prob : the probability of the distribution on the slab part. """ super(SpikeAndSlabPosterior, self).__init__(means, variances, name) - self.gamma = Param("binary_prob",binary_prob) + self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.)) self.link_parameter(self.gamma) - - @Cache_this(limit=5) - def gamma_probabilities(self): - prob = np.zeros_like(param_to_array(self.gamma)) - prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710])) - prob1 = -np.zeros_like(param_to_array(self.gamma)) - prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710])) - return prob, prob1 - - @Cache_this(limit=5) - def gamma_log_prob(self): - loggamma = param_to_array(self.gamma).copy() - loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40])) - loggamma1 = -param_to_array(self.gamma).copy() - loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40])) - return loggamma,loggamma1 def set_gradients(self, grad): self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index cac69872..2e633e16 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference): Kmm = kern.K(Z).copy() diag.add(Kmm, self.const_jitter) - Lm = jitchol(Kmm, maxtries=100) + if not np.isfinite(Kmm).all(): + print Kmm + Lm = jitchol(Kmm) LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT - LL = jitchol(Lambda, maxtries=100) + LL = jitchol(Lambda) logdet_L = 2.*np.sum(np.log(np.diag(LL))) b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0] bbt = np.square(b).sum() diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py index 18a4d751..f6a24c86 100644 --- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py @@ -22,12 +22,14 @@ try: # _psi1 NxM mu = variational_posterior.mean S = variational_posterior.variance + gamma = variational_posterior.binary_prob N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] l2 = np.square(lengthscale) log_denom1 = np.log(S/l2+1) log_denom2 = np.log(2*S/l2+1) - log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() + log_gamma = np.log(gamma) + log_gamma1 = np.log(1.-gamma) variance = float(variance) psi0 = np.empty(N) psi0[:] = variance @@ -37,6 +39,7 @@ try: from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) + gamma = param_to_array(gamma) Z = param_to_array(Z) support_code = """ @@ -79,7 +82,7 @@ try: } } """ - weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) + weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) psi2 = psi2n.sum(axis=0) return psi0,psi1,psi2,psi2n @@ -94,12 +97,13 @@ try: mu = variational_posterior.mean S = variational_posterior.variance + gamma = variational_posterior.binary_prob N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] l2 = np.square(lengthscale) log_denom1 = np.log(S/l2+1) log_denom2 = np.log(2*S/l2+1) - log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() - gamma, gamma1 = variational_posterior.gamma_probabilities() + log_gamma = np.log(gamma) + log_gamma1 = np.log(1.-gamma) variance = float(variance) dvar = np.zeros(1) @@ -113,6 +117,7 @@ try: from ....util.misc import param_to_array S = param_to_array(S) mu = param_to_array(mu) + gamma = param_to_array(gamma) Z = param_to_array(Z) support_code = """ @@ -130,7 +135,6 @@ try: double Zm1q = Z(m1,q); double Zm2q = Z(m2,q); double gnq = gamma(n,q); - double g1nq = gamma1(n,q); double mu_nq = mu(n,q); if(m2==0) { @@ -156,7 +160,7 @@ try: dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; - dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; + dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum); dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum; } @@ -184,7 +188,7 @@ try: dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; - dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; + dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; } @@ -192,7 +196,7 @@ try: } } """ - weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) + weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) dl *= 2.*lengthscale if not ARD: diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index a61ad2a0..04006d84 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI): X_variance = np.random.uniform(0,.1,X.shape) if Gamma is None: - gamma = np.random.randn(X.shape[0], input_dim) + gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation + gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim) + gamma[gamma>1.-1e-9] = 1.-1e-9 + gamma[gamma<1e-9] = 1e-9 else: gamma = Gamma.copy() From 7fa0c19a88c102516904ad007164f0276a095309 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 30 Mar 2015 22:24:48 +0100 Subject: [PATCH 10/10] optimize sslinear kernel --- GPy/kern/_src/psi_comp/sslinear_psi_comp.py | 44 +++++++++------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/GPy/kern/_src/psi_comp/sslinear_psi_comp.py b/GPy/kern/_src/psi_comp/sslinear_psi_comp.py index 5f261785..d431cd61 100644 --- a/GPy/kern/_src/psi_comp/sslinear_psi_comp.py +++ b/GPy/kern/_src/psi_comp/sslinear_psi_comp.py @@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati # Compute for psi0 and psi1 mu2S = np.square(mu)+S - dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) - dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu) - dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z) - dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance) - dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu) + dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0) + dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance + dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance + dL_dS += dL_dpsi0[:,None]*variance*gamma + dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma @@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma): gamma2 = np.square(gamma) variance2 = np.square(variance) mu2S = mu2+S # NxQ - gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance) - common_sum = np.einsum('nq,mq->nm',gvm,Z) -# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM - Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z) + gvm = gamma*mu*variance + common_sum = gvm.dot(Z.T) + Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0) + Z_expect_var2 = Z_expect*variance2 dL_dpsi2T = dL_dpsi2+dL_dpsi2.T - tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z) - common_expect = np.einsum('mq,nm->nq',tmp,common_sum) -# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum) - Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum) - Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z) + common_expect = common_sum.dot(dL_dpsi2T).dot(Z) + Z2_expect = common_sum.dot(dL_dpsi2T) + Z1_expect = dL_dpsi2T.dot(Z) - dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\ - np.einsum('nq,nq,nq->q',common_expect,gamma,mu) + dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0) - dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\ - np.einsum('nq,q,nq->nq',common_expect,variance,mu) + dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance + + dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance + + dL_dS = gamma*Z_expect_var2 - dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\ - np.einsum('nq,nq,q->nq',common_expect,gamma,variance) - - dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2) - -# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum)) - dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect) + dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ