From 169fd9b8d4852b29f77ac05535f1cb8a311a7008 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 14 Mar 2014 12:42:36 +0000
Subject: [PATCH 1/5] Stablised exp link_function and quadrature variances

---
 GPy/likelihoods/likelihood.py     |  8 +++++++-
 GPy/likelihoods/link_functions.py | 11 +++++++----
 GPy/likelihoods/poisson.py        |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 3eafedb1..4b8881de 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -178,7 +178,13 @@ class Likelihood(Parameterized):
 
         #E( E(Y_star|f_star)**2 )
         def int_pred_mean_sq(f,m,v,predictive_mean_sq):
-            return self.conditional_mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_mean**2 will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_mean(f)**2*p
+
         scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
         exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
 
diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index 2a1bf147..942fe2f4 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -6,6 +6,9 @@ from scipy import stats
 import scipy as sp
 from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
 
+_exp_lim_val = np.finfo(np.float64).max
+_lim_val = np.log(_exp_lim_val)
+
 class GPTransformation(object):
     """
     Link function class for doing non-Gaussian likelihoods approximation
@@ -92,16 +95,16 @@ class Log(GPTransformation):
 
     """
     def transf(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))
 
     def dtransf_df(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))
 
     def d2transf_df2(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))
 
     def d3transf_df3(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))
 
 class Log_ex_1(GPTransformation):
     """
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index 419514d1..c67a7e12 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -21,7 +21,7 @@ class Poisson(Likelihood):
     """
     def __init__(self, gp_link=None):
         if gp_link is None:
-            gp_link = link_functions.Log_ex_1()
+            gp_link = link_functions.Log()
 
         super(Poisson, self).__init__(gp_link, name='Poisson')
 

From 7cd75ccbf58c5f01441669503d5da82a5d1f33d4 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 14 Mar 2014 12:52:52 +0000
Subject: [PATCH 2/5] Stablised other quadrature (should speed things up also),
 added sampling ability to poisson

---
 GPy/likelihoods/likelihood.py | 14 ++++++++++++--
 GPy/likelihoods/poisson.py    |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 4b8881de..98a856fd 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -142,7 +142,12 @@ class Likelihood(Parameterized):
         """
         #conditional_mean: the edpected value of y given some f, under this likelihood
         def int_mean(f,m,v):
-            return self.conditional_mean(f)*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_mean will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_mean(f)*p
         scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
         mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
 
@@ -165,7 +170,12 @@ class Likelihood(Parameterized):
 
         # E( V(Y_star|f_star) )
         def int_var(f,m,v):
-            return self.conditional_variance(f)*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_variance will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_variance(f)*p
         scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
         exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
 
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index c67a7e12..c0e2c81f 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -143,7 +143,7 @@ class Poisson(Likelihood):
         """
         return self.gp_link.transf(gp)
 
-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
         """
         Returns a set of samples of observations based on a given value of the latent variable.
 

From a12bdafad7d03e57bd40818cbcb49d671205d7b7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 14 Mar 2014 14:28:34 +0000
Subject: [PATCH 3/5] added jitter to fitc

---
 GPy/inference/latent_function_inference/fitc.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py
index 9294e25d..c4147d06 100644
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@@ -3,6 +3,7 @@
 
 from posterior import Posterior
 from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
+from ...util import diag
 import numpy as np
 log_2_pi = np.log(2*np.pi)
 
@@ -14,8 +15,7 @@ class FITC(object):
     the posterior.
 
     """
-    def __init__(self):
-        self.const_jitter = 1e-6
+    const_jitter = 1e-6
 
     def inference(self, kern, X, Z, likelihood, Y):
 
@@ -33,6 +33,7 @@ class FITC(object):
         U = Knm
 
         #factor Kmm
+        diag.add(Kmm, self.const_jitter)
         Kmmi, L, Li, _ = pdinv(Kmm)
 
         #compute beta_star, the effective noise precision

From 4757265b240924af09ac0032984c5c8d85f4d805 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 14 Mar 2014 15:23:32 +0000
Subject: [PATCH 4/5] Added a hack fix as suggested by max, zeroing any
 negative values (should really be numerically negative values on diagonal)

---
 GPy/kern/_src/independent_outputs.py |  3 ++-
 GPy/kern/_src/stationary.py          | 28 +++++++++++++++-------------
 GPy/testing/kernel_tests.py          | 19 +++++++++++++------
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index 1848bf6a..85cb95bc 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -8,7 +8,7 @@ import itertools
 
 def index_to_slices(index):
     """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index.
 
     e.g.
     >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@@ -40,6 +40,7 @@ class IndependentOutputs(CombinationKernel):
     The index of the functions is given by the last column in the input X
     the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
 
+    Kern is wrapped with a slicer metaclass
     """
     def __init__(self, kern, index_dim=-1, name='independ'):
         assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index df7ba058..a9e837a9 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -15,21 +15,21 @@ class Stationary(Kern):
     """
     Stationary kernels (covariance functions).
 
-    Stationary covariance fucntion depend only on r, where r is defined as 
+    Stationary covariance fucntion depend only on r, where r is defined as
 
       r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
 
-    The covariance function k(x, x' can then be written k(r). 
+    The covariance function k(x, x' can then be written k(r).
 
     In this implementation, r is scaled by the lengthscales parameter(s):
 
-      r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }. 
-    
+      r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
+
     By default, there's only one lengthscale: seaprate lengthscales for each
-    dimension can be enables by setting ARD=True. 
+    dimension can be enables by setting ARD=True.
 
     To implement a stationary covariance function using this class, one need
-    only define the covariance function k(r), and it derivative. 
+    only define the covariance function k(r), and it derivative.
 
       ...
       def K_of_r(self, r):
@@ -37,10 +37,10 @@ class Stationary(Kern):
       def dK_dr(self, r):
           return bar
 
-    The lengthscale(s) and variance parameters are added to the structure automatically. 
-          
+    The lengthscale(s) and variance parameters are added to the structure automatically.
+
     """
-    
+
     def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
         super(Stationary, self).__init__(input_dim, active_dims, name)
         self.ARD = ARD
@@ -57,7 +57,7 @@ class Stationary(Kern):
                 if lengthscale.size != input_dim:
                     lengthscale = np.ones(input_dim)*lengthscale
             else:
-                lengthscale = np.ones(self.input_dim)        
+                lengthscale = np.ones(self.input_dim)
         self.lengthscale = Param('lengthscale', lengthscale, Logexp())
         self.variance = Param('variance', variance, Logexp())
         assert self.variance.size==1
@@ -95,7 +95,9 @@ class Stationary(Kern):
             #X2, = self._slice_X(X2)
             X1sq = np.sum(np.square(X),1)
             X2sq = np.sum(np.square(X2),1)
-            return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
+            r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
+            r2[r2<0] = 0. # A bit hacky
+            return np.sqrt(r2)
 
     @Cache_this(limit=5, ignore_args=())
     def _scaled_dist(self, X, X2=None):
@@ -133,7 +135,7 @@ class Stationary(Kern):
         if self.ARD:
             #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
             #d =  X[:, None, :] - X2[None, :, :]
-            #x_xl3 = np.square(d) 
+            #x_xl3 = np.square(d)
             #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
             tmp = dL_dr*self._inv_dist(X, X2)
             if X2 is None: X2 = X
@@ -247,7 +249,7 @@ class Matern52(Stationary):
 
     .. math::
 
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) 
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
        """
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
         super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index b057f8ef..3b17a3d0 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -120,6 +120,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
 
     if verbose:
         print("Checking covariance function is positive definite.")
+    #if isinstance(kern, GPy.kern.IndependentOutputs):
+        #import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
     result = Kern_check_model(kern, X=X).is_positive_semi_definite()
     if result and verbose:
         print("Check passed.")
@@ -306,17 +308,22 @@ class KernelTestsNonContinuous(unittest.TestCase):
         D = self.D
         self.X = np.random.randn(N,D)
         self.X2 = np.random.randn(N1,D)
-        self.X_block = np.zeros((N+N1, D+D+1))
+        #self.X_block = np.zeros((N+N1, D+D+1))
+        #self.X_block[0:N, 0:D] = self.X
+        #self.X_block[N:N+N1, D:D+D] = self.X2
+        #self.X_block[0:N, -1] = 0
+        #self.X_block[N:N+N1, -1] = 1
+        self.X_block = np.zeros((N+N1, D+1))
         self.X_block[0:N, 0:D] = self.X
-        self.X_block[N:N+N1, D:D+D] = self.X2
-        self.X_block[0:N, -1] = 1
-        self.X_block[N:N+1, -1] = 2
+        self.X_block[N:N+N1, 0:D] = self.X2
+        self.X_block[0:N, -1] = 0
+        self.X_block[N:N+N1, -1] = 1
         self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
- 
+
     def test_IndependentOutputs(self):
         k = GPy.kern.RBF(self.D)
         kern = GPy.kern.IndependentOutputs(k, -1)
-        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose))
+        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose))
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."

From bfa18ef6625770733f24d154f12505b5353e69a3 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 14 Mar 2014 15:34:00 +0000
Subject: [PATCH 5/5] Fixed Y_metadata bug

---
 GPy/core/gp.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 05e3e671..70b7d695 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -42,7 +42,10 @@ class GP(Model):
         assert Y.shape[0] == self.num_data
         _, self.output_dim = self.Y.shape
 
-        self.Y_metadata = Y_metadata or {}
+        if Y_metadata is None:
+            Y_metadata = {}
+        else:
+            self.Y_metadata = Y_metadata
 
         assert isinstance(kernel, kern.Kern)
         #assert self.input_dim == kernel.input_dim