From 20a02cc4453bd41ede9bff3c96bca02d3ec61c8c Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 5 Dec 2012 22:25:37 -0800
Subject: [PATCH 1/9] removed uncertain gp regression from the model __init__,
 since it's now just a switch in the sparse GP

---
 GPy/models/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index d176e7b6..dd721559 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -9,4 +9,3 @@ from warped_GP import warpedGP
 from GP_EP import GP_EP
 from generalized_FITC import generalized_FITC
 from sparse_GPLVM import sparse_GPLVM
-from uncertain_input_GP_regression import uncertain_input_GP_regression

From c8e2ca351d5ea0b66057296c6c4c29eefe3bb247 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 5 Dec 2012 22:45:08 -0800
Subject: [PATCH 2/9] some tidying in the EP code

---
 GPy/inference/Expectation_Propagation.py | 118 ++++++++++++-----------
 1 file changed, 64 insertions(+), 54 deletions(-)

diff --git a/GPy/inference/Expectation_Propagation.py b/GPy/inference/Expectation_Propagation.py
index 377b1963..379bf8b7 100644
--- a/GPy/inference/Expectation_Propagation.py
+++ b/GPy/inference/Expectation_Propagation.py
@@ -11,45 +11,21 @@ from ..util.linalg import pdinv,mdot,jitchol
 from ..util.plot import gpplot
 from .. import kern
 
-class EP:
-    def __init__(self,covariance,likelihood,Kmn=None,Knn_diag=None,epsilon=1e-3,powerep=[1.,1.]):
-        """
-        Expectation Propagation
+class EP_base:
+    """
+    Expectation Propagation.
 
-        Arguments
-        ---------
-        X : input observations
-        likelihood : Output's likelihood (likelihood class)
-        kernel :  a GPy kernel (kern class)
-        inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
-        powerep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
-        epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
-        """
+    This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
+    """
+    def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
         self.likelihood = likelihood
-        assert covariance.shape[0] == covariance.shape[1]
-        if Kmn is not None:
-            self.Kmm = covariance
-            self.Kmn = Kmn
-            self.M = self.Kmn.shape[0]
-            self.N = self.Kmn.shape[1]
-            assert self.M < self.N, 'The number of inducing inputs must be smaller than the number of observations'
-        else:
-            self.K = covariance
-            self.N = self.K.shape[0]
-        if Knn_diag is not None:
-            self.Knn_diag = Knn_diag
-            assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
-
         self.epsilon = epsilon
         self.eta, self.delta = powerep
         self.jitter = 1e-12
 
-        """
-        Initial values - Likelihood approximation parameters:
-        p(y|f) = t(f|tau_tilde,v_tilde)
-        """
-        self.tau_tilde = np.zeros(self.N)
-        self.v_tilde = np.zeros(self.N)
+        #Initial values - Likelihood approximation parameters:
+        #p(y|f) = t(f|tau_tilde,v_tilde)
+        self.restart_EP()
 
     def restart_EP(self):
         """
@@ -59,7 +35,22 @@ class EP:
         self.v_tilde = np.zeros(self.N)
         self.mu = np.zeros(self.N)
 
-class Full(EP):
+class Full(EP_base):
+    """
+    :param likelihood: Output's likelihood (e.g. probit)
+    :type likelihood: GPy.inference.likelihood instance
+    :param K: prior covariance matrix
+    :type K: np.ndarray (N x N)
+    :param likelihood: Output's likelihood (e.g. probit)
+    :type likelihood: GPy.inference.likelihood instance
+    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
+    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
+    """
+    def __init__(self,K,likelihood,*args,**kwargs):
+        assert K.shape[0] == K.shape[1]
+        self.K = K
+        self.N = self.K.shape[0]
+        EP_base.__init__(self,likelihood,*args,**kwargs)
     def fit_EP(self):
         """
         The expectation-propagation algorithm.
@@ -78,15 +69,16 @@ class Full(EP):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        self.tau_ = np.empty(self.N,dtype=float)
-        self.v_ = np.empty(self.N,dtype=float)
+
+        self.tau_ = np.empty(self.N,dtype=np.float64)
+        self.v_ = np.empty(self.N,dtype=np.float64)
 
         #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=float)
-        self.Z_hat = np.empty(self.N,dtype=float)
-        phi = np.empty(self.N,dtype=float)
-        mu_hat = np.empty(self.N,dtype=float)
-        sigma2_hat = np.empty(self.N,dtype=float)
+        z = np.empty(self.N,dtype=np.float64)
+        self.Z_hat = np.empty(self.N,dtype=np.float64)
+        phi = np.empty(self.N,dtype=np.float64)
+        mu_hat = np.empty(self.N,dtype=np.float64)
+        sigma2_hat = np.empty(self.N,dtype=np.float64)
 
         #Approximation
         epsilon_np1 = self.epsilon + 1.
@@ -95,8 +87,7 @@ class Full(EP):
         self.np1 = [self.tau_tilde.copy()]
         self.np2 = [self.v_tilde.copy()]
         while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
-            update_order = np.arange(self.N)
-            random.shuffle(update_order)
+            update_order = np.random.permutation(self.N)
             for i in update_order:
                 #Cavity distribution parameters
                 self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
@@ -120,14 +111,33 @@ class Full(EP):
             V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
             self.Sigma = self.K - np.dot(V.T,V)
             self.mu = np.dot(self.Sigma,self.v_tilde)
-            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
-            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
+            epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
+            epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
             self.np1.append(self.tau_tilde.copy())
             self.np2.append(self.v_tilde.copy())
 
-            self.np2.append(self.v_tilde.copy())
+class FITC(EP_base):
+    """
+    :param likelihood: Output's likelihood (e.g. probit)
+    :type likelihood: GPy.inference.likelihood instance
+    :param Knn_diag: The diagonal elements of Knn is a 1D vector
+    :param Kmn: The 'cross' variance between inducing inputs and data
+    :param Kmm: the covariance matrix of the inducing inputs
+    :param likelihood: Output's likelihood (e.g. probit)
+    :type likelihood: GPy.inference.likelihood instance
+    :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
+    :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
+    """
+    def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs)
+        self.Knn_diag = Knn_diag
+        self.Kmn = Kmn
+        self.Kmm = Kmm
+        self.M = self.Kmn.shape[0]
+        self.N = self.Kmn.shape[1]
+        assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
+        assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
+        EP_base.__init__(self,likelihood,*args,**kwargs)
 
-class FITC(EP):
     def fit_EP(self):
         """
         The expectation-propagation algorithm with sparse pseudo-input.
@@ -166,15 +176,15 @@ class FITC(EP):
         sigma_ = 1./tau_
         mu_ = v_/tau_
         """
-        self.tau_ = np.empty(self.N,dtype=float)
-        self.v_ = np.empty(self.N,dtype=float)
+        self.tau_ = np.empty(self.N,dtype=np.float64)
+        self.v_ = np.empty(self.N,dtype=np.float64)
 
         #Initial values - Marginal moments
-        z = np.empty(self.N,dtype=float)
-        self.Z_hat = np.empty(self.N,dtype=float)
-        phi = np.empty(self.N,dtype=float)
-        mu_hat = np.empty(self.N,dtype=float)
-        sigma2_hat = np.empty(self.N,dtype=float)
+        z = np.empty(self.N,dtype=np.float64)
+        self.Z_hat = np.empty(self.N,dtype=np.float64)
+        phi = np.empty(self.N,dtype=np.float64)
+        mu_hat = np.empty(self.N,dtype=np.float64)
+        sigma2_hat = np.empty(self.N,dtype=np.float64)
 
         #Approximation
         epsilon_np1 = 1

From 574f9f4e0a90ea290404372073091b03a8c2d1bf Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 09:21:37 -0800
Subject: [PATCH 3/9] more tidying in EP, removed examples from _module_ ( and
 opened discussion on github

---
 GPy/__init__.py                          | 2 +-
 GPy/inference/Expectation_Propagation.py | 2 +-
 GPy/models/GP_EP.py                      | 2 +-
 GPy/models/generalized_FITC.py           | 2 +-
 GPy/models/uncollapsed_sparse_GP.py      | 3 ++-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 876e2ca6..6993d5c2 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -6,5 +6,5 @@ import kern
 import models
 import inference
 import util
-import examples
+#import examples TODO: discuss!
 from core import priors
diff --git a/GPy/inference/Expectation_Propagation.py b/GPy/inference/Expectation_Propagation.py
index 379bf8b7..c281578e 100644
--- a/GPy/inference/Expectation_Propagation.py
+++ b/GPy/inference/Expectation_Propagation.py
@@ -128,7 +128,7 @@ class FITC(EP_base):
     :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
     :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
     """
-    def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs)
+    def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
         self.Knn_diag = Knn_diag
         self.Kmn = Kmn
         self.Kmm = Kmm
diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py
index 5dc721a4..bb582674 100644
--- a/GPy/models/GP_EP.py
+++ b/GPy/models/GP_EP.py
@@ -6,7 +6,7 @@ import numpy as np
 import pylab as pb
 from scipy import stats, linalg
 from .. import kern
-from ..inference.Expectation_Propagation import EP,Full
+from ..inference.Expectation_Propagation import Full
 from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
 from ..core import model
 from ..util.linalg import pdinv,jitchol
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index b4953ecb..f6fef670 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -9,7 +9,7 @@ from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot
-from ..inference.Expectation_Propagation import EP,Full,FITC
+from ..inference.Expectation_Propagation import FITC
 from ..inference.likelihoods import likelihood,probit
 
 class generalized_FITC(model):
diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py
index b5d4b054..89a8ff0e 100644
--- a/GPy/models/uncollapsed_sparse_GP.py
+++ b/GPy/models/uncollapsed_sparse_GP.py
@@ -39,7 +39,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
                 M = Z.shape[0]
             else:
                 M=M
-            self.set_vb_param(np.hstack((np.ones(M*D)),np.eye(M).flatten()))
+            q_u = np.hstack((np.ones(M*D)),np.eye(M).flatten())
+        self.set_vb_param(q_u)
         sparse_GP_regression.__init__(self, X, Y, *args, **kwargs)
 
     def _computations(self):

From c72d1b1a5b524d70fff81c16ef96ec9371ee951a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 09:51:13 -0800
Subject: [PATCH 4/9] changed the behaviour of pdinv: now returns L and Li as
 well as the inverse

---
 GPy/kern/finite_dimensional.py     |  2 +-
 GPy/models/GP_regression.py        |  4 ++--
 GPy/models/generalized_FITC.py     |  6 +++---
 GPy/models/sparse_GP_regression.py | 10 +++-------
 GPy/util/linalg.py                 | 24 ++++++++++++------------
 5 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/finite_dimensional.py
index 421a3c5b..98c99628 100644
--- a/GPy/kern/finite_dimensional.py
+++ b/GPy/kern/finite_dimensional.py
@@ -19,7 +19,7 @@ class finite_dimensional(kernpart):
         self.D = D
         self.F = F
         self.G = G
-        self.G_1 ,tmp = pdinv(G)
+        self.G_1 ,L,Li,logdet = pdinv(G)
         self.n = F.shape[0]
         if weights is not None:
             assert weights.shape==(self.n,)
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index 885f13cc..f21c4591 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -71,7 +71,7 @@ class GP_regression(model):
     def set_param(self,p):
         self.kern.expand_param(p)
         self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.Ki,self.hld = pdinv(self.K)
+        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
     def get_param(self):
         return self.kern.extract_param()
@@ -90,7 +90,7 @@ class GP_regression(model):
             return -0.5*np.sum(np.multiply(self.Ki, self.Youter))
 
     def log_likelihood(self):
-        complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - self.D*self.hld
+        complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
         return complexity_term + self._model_fit_term()
 
     def dL_dK(self):
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index f6fef670..4ef36f0e 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -62,7 +62,7 @@ class generalized_FITC(model):
     def posterior_param(self):
         self.Knn_diag = self.kernel.Kdiag(self.X)
         self.Kmm = self.kernel.K(self.Z)
-        self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
+        self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
         self.Knm = self.kernel.K(self.X,self.Z)
         self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
         self.Qnn = np.dot(self.Knm,self.KmmiKmn)
@@ -72,10 +72,10 @@ class generalized_FITC(model):
         self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
         self.KmnTaut = self.Knm.T*self.Taut[None,:]
         self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
-        self.Woodbury_inv, self.Woodbury_hld = pdinv(self.Kmm + self.KmnTautKnm)
+        self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
         self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
         self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
-        self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - self.Kmm_hld + self.Woodbury_hld
+        self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
 
         self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
         self.P = (self.Diag / self.Diag0)[:,None] * self.Knm
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index 38aeef08..da19d80a 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -87,14 +87,10 @@ class sparse_GP_regression(GP_regression):
         self.V = self.beta*self.Y
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.Lm = jitchol(self.Kmm)
-        self.Lmi = chol_inv(self.Lm)
-        self.Kmmi = np.dot(self.Lmi.T, self.Lmi)
+        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
         self.A = mdot(self.Lmi, self.psi2, self.Lmi.T)
         self.B = np.eye(self.M) + self.beta * self.A
-        self.LB = jitchol(self.B)
-        self.LBi = chol_inv(self.LB)
-        self.Bi = np.dot(self.LBi.T, self.LBi)
+        self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
         self.LLambdai = np.dot(self.LBi, self.Lmi)
         self.trace_K = self.psi0 - np.trace(self.A)
         self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
@@ -124,7 +120,7 @@ class sparse_GP_regression(GP_regression):
         """
         A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
         B = -0.5*self.beta*self.D*self.trace_K
-        C = -self.D * np.sum(np.log(np.diag(self.LB)))
+        C = -0.5*self.D * self.B_logdet
         D = -0.5*self.beta*self.trYYT
         E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
         return A+B+C+D+E
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 8afffce3..f8f03d5f 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -78,23 +78,23 @@ def jitchol(A,maxtries=5):
 
 def pdinv(A):
     """
-    Arguments
-    ---------
     :param A: A DxD pd numpy array
 
-    Returns
-    -------
-    inv : the inverse of A
-    hld: 0.5* the log of the determinant of A
+    :rval Ai: the inverse of A
+    :rtype Ai: np.ndarray
+    :rval L: the Cholesky decomposition of A
+    :rtype L: np.ndarray
+    :rval Li: the Cholesky decomposition of Ai
+    :rtype Li: np.ndarray
+    :rval logdet: the log of the determinant of A
+    :rtype logdet: float64
     """
     L = jitchol(A)
-    hld = np.sum(np.log(np.diag(L)))
+    logdet = 2.*np.sum(np.log(np.diag(L)))
+    Li = chol_inv(L)
+    Ai = np.dot(Li.T,Li) #TODO: get the flapack routine form multiplying triangular matrices
 
-    inv = sp.lib.lapack.flapack.dpotri(L)[0]
-    # inv = linalg.flapack.dpotri(L,lower = 1)[0]
-    inv = np.tril(inv)+np.tril(inv,-1).T
-
-    return inv, hld
+    return Ai, L, Li, logdet
 
 
 def chol_inv(L):

From f634350398f6b87a4dd99443744b0d658187323f Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 10:16:19 -0800
Subject: [PATCH 5/9] fixed small bugs in linalg, setup.py

---
 GPy/util/linalg.py | 19 +++++++------------
 setup.py           |  2 +-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index f8f03d5f..092589a9 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -46,19 +46,14 @@ def _mdot_r(a,b):
 
 def jitchol(A,maxtries=5):
     """
-    Arguments
-    ---------
-    A : An almost pd square matrix
+    :param A : An almost pd square matrix
 
-    Returns
-    -------
-    cholesky(K)
+    :rval L: the Cholesky decomposition of A
 
-    Notes
-    -----
-    Adds jitter to K, to enforce positive-definiteness
-    if stuff breaks, please check:
-    np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
+    .. Note:
+      Adds jitter to K, to enforce positive-definiteness
+      if stuff breaks, please check:
+      np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
     """
     try:
         return linalg.cholesky(A, lower = True)
@@ -139,7 +134,7 @@ def PCA(Y, Q):
 
     Returns
     -------
-    X - NxQ np.array of dimensionality reduced data
+    :rval X: - NxQ np.array of dimensionality reduced data
     W - QxD mapping from X to Y
     """
     if not np.allclose(Y.mean(axis=0), 0.0):
diff --git a/setup.py b/setup.py
index ed841b56..076af82c 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ setup(name = 'GPy',
       package_dir={'GPy': 'GPy'},
       package_data = {'GPy': ['GPy/examples']},
       py_modules = ['GPy.__init__'],
-      long_description=read('README'),
+      long_description=read('README.md'),
       ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
                 sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
       install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],

From 9af637e214ee40c9af68af156b1457d8b0f3ab62 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 11:28:04 -0800
Subject: [PATCH 6/9] changes the naming of kern components

Kern components now only get a number if their name is duplicated
---
 GPy/kern/kern.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 158913ff..3830d844 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -99,6 +99,7 @@ class kern(parameterised):
         newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
         newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
         return newkern
+
     def add(self,other):
         """
         Add another kernel to this one. Both kernels are defined on the same _space_
@@ -139,7 +140,13 @@ class kern(parameterised):
         [p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
 
     def get_param_names(self):
-        return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[])
+        #this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
+        part_names = np.array([k.name for k in self.parts],dtype=np.str)
+        counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
+        cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
+        names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
+
+        return sum([[name+'_'+n for n in k.get_param_names()] for name,k in zip(names,self.parts)],[])
 
     def K(self,X,X2=None,slices1=None,slices2=None):
         assert X.shape[1]==self.D

From ac84f4f34c337d0001898db6dc4135d01af2970e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 11:30:12 -0800
Subject: [PATCH 7/9] changed the name of GP_EP (from simple) in the unit test,
 added a messages option for full EP

---
 GPy/inference/Expectation_Propagation.py | 4 +++-
 GPy/testing/unit_tests.py                | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/GPy/inference/Expectation_Propagation.py b/GPy/inference/Expectation_Propagation.py
index c281578e..05453f1d 100644
--- a/GPy/inference/Expectation_Propagation.py
+++ b/GPy/inference/Expectation_Propagation.py
@@ -51,7 +51,7 @@ class Full(EP_base):
         self.K = K
         self.N = self.K.shape[0]
         EP_base.__init__(self,likelihood,*args,**kwargs)
-    def fit_EP(self):
+    def fit_EP(self,messages=False):
         """
         The expectation-propagation algorithm.
         For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
@@ -115,6 +115,8 @@ class Full(EP_base):
             epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
             self.np1.append(self.tau_tilde.copy())
             self.np2.append(self.v_tilde.copy())
+            if messages:
+                print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
 
 class FITC(EP_base):
     """
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 9191cedd..02a63feb 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -139,13 +139,13 @@ class GradientTests(unittest.TestCase):
         m.constrain_positive('(linear|bias|white)')
         self.assertTrue(m.checkgrad())
 
-    def test_simple_GP_EP(self):
+    def test_GP_EP(self):
         N = 20
         X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
         k = GPy.kern.rbf(1) + GPy.kern.white(1)
         Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
         likelihood = GPy.inference.likelihoods.probit(Y)
-        m = GPy.models.simple_GP_EP(X,likelihood,k)
+        m = GPy.models.GP_EP(X,likelihood,k)
         m.constrain_positive('(var|len)')
         m.approximate_likelihood()
         self.assertTrue(m.checkgrad())

From 50029ca4a7a47c5220c879c6c888a3fdba5b5dc2 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 6 Dec 2012 11:46:34 -0800
Subject: [PATCH 8/9] implemented default constraints

via m.ensure_default_constraints()
---
 GPy/core/model.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 3d64441e..6ae292d0 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -162,6 +162,20 @@ class model(parameterised):
         else:
             self.expand_param(initial_parameters)
 
+    def ensure_default_constraints(self,warn=False):
+        """
+        Ensure that any variables which should clearly be positive have been constrained somehow.
+        """
+        positive_strings = ['variance','lengthscale', 'precision']
+        for s in positive_strings:
+            for i in self.grep_param_names(s):
+                if not (i in self.all_constrained_indices()):
+                    name = self.get_param_names()[i]
+                    self.constrain_positive(name)
+                    if warn:
+                        print "Warning! constraining %s postive"%name
+
+
     def optimize(self, optimizer=None, start=None, **kwargs):
         """
         Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.

From 493f236b90c0e445d832a48c924ec9f69ef1ec90 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <neil@stefano.(none)>
Date: Sun, 9 Dec 2012 01:53:42 -0800
Subject: [PATCH 9/9] Added notes on issues found.

---
 GPy/notes.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 GPy/notes.txt

diff --git a/GPy/notes.txt b/GPy/notes.txt
new file mode 100644
index 00000000..c09102b9
--- /dev/null
+++ b/GPy/notes.txt
@@ -0,0 +1,42 @@
+Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
+
+The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
+
+Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
+
+Missing kernels: polynomial, rational quadratic.
+
+Kernel implementations are far to obscure. Need to be easily readable for a first time user.
+
+Need an implementation of scaled conjugate gradients for the optimizers.
+
+Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
+
+Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
+
+Change get_param and set_param to get_params and set_params
+
+Get constrain param by default inside model creation.
+
+Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
+
+
+Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
+
+Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
+
+Change Youter to YYT (Youter doesn't mean anything for matrices).
+
+Bug when running classification.crescent_data()
+
+A dictionary for parameter storage? So we can go through names easily?
+
+A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
+
+Diagonal noise covariance function
+
+Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
+
+When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
+
+the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.