From 323545f2d1077645cedb48b50d10eeba3bd13e54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Wed, 8 May 2013 15:46:43 +0200
Subject: [PATCH 1/6] small change to GPLVM param indexing

---
 GPy/models/GPLVM.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index bd56ff12..0b60f070 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -45,7 +45,7 @@ class GPLVM(GP):
         return np.hstack((self.X.flatten(), GP._get_params(self)))
 
     def _set_params(self,x):
-        self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
+        self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
         GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):

From b17cc60182cbbdce2b2b90229d4c8105b3d97843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Wed, 8 May 2013 15:47:06 +0200
Subject: [PATCH 2/6] better f_inv

---
 GPy/models/warped_GP.py       | 11 ++++++++++-
 GPy/util/warping_functions.py | 24 +++++++++++++++---------
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_GP.py
index 052f8d8e..8072faeb 100644
--- a/GPy/models/warped_GP.py
+++ b/GPy/models/warped_GP.py
@@ -23,6 +23,7 @@ class warpedGP(GP):
             self.warping_function = TanhWarpingFunction_d(warping_terms)
             self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
 
+        Y = self._scale_data(Y)
         self.has_uncertain_inputs = False
         self.Y_untransformed = Y.copy()
         self.predict_in_warped_space = False
@@ -30,6 +31,14 @@ class warpedGP(GP):
 
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
 
+    def _scale_data(self, Y):
+        self._Ymax = Y.max()
+        self._Ymin = Y.min()
+        return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
+
+    def _unscale_data(self, Y):
+        return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
+
     def _set_params(self, x):
         self.warping_params = x[:self.warping_function.num_parameters]
         Y = self.transform_data()
@@ -79,5 +88,5 @@ class warpedGP(GP):
         if self.predict_in_warped_space:
             mu = self.warping_function.f_inv(mu, self.warping_params)
             var = self.warping_function.f_inv(var, self.warping_params)
-
+            mu = self._unscale_data(mu)
         return mu, var
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index 3ea6dcc6..98d4d2b7 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -81,7 +81,7 @@ class TanhWarpingFunction(WarpingFunction):
         iterations: number of N.R. iterations
 
         """
-        
+
         y = y.copy()
         z = np.ones_like(y)
 
@@ -176,7 +176,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         mpsi = psi.copy()
         d = psi[-1]
         mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
-        
+
         #3. transform data
         z = d*y.copy()
         for i in range(len(mpsi)):
@@ -185,7 +185,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return z
 
 
-    def f_inv(self, y, psi, iterations = 30):
+    def f_inv(self, z, psi, max_iterations = 1000):
         """
         calculate the numerical inverse of f
 
@@ -194,13 +194,19 @@ class TanhWarpingFunction_d(WarpingFunction):
 
         """
 
-        y = y.copy()
-        z = np.ones_like(y)
+        z = z.copy()
+        y = np.ones_like(z)
+        it = 0
+        update = np.inf
 
-        for i in range(iterations):
-            z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
-
-        return z
+        while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
+            update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
+            y -= update
+            it += 1
+        if it == max_iterations:
+            print "WARNING!!! Maximum number of iterations reached in f_inv "
+            
+        return y
 
 
     def fgrad_y(self, y, psi, return_precalc = False):

From 4182d60513fc1e50d376f98da641613db9c60aca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Fri, 10 May 2013 11:44:54 +0200
Subject: [PATCH 3/6] small changes

---
 GPy/inference/SGD.py | 111 ++++++++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 48 deletions(-)

diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index 588ced75..778c79d7 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -97,52 +97,67 @@ class opt_SGD(Optimizer):
         return subset
 
     def shift_constraints(self, j):
-        # back them up
-        bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
-        bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
-        bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
 
-        for b in range(len(bounded_i)): # for each group of constraints
-            for bc in range(len(bounded_i[b])):
-                pos = np.where(j == bounded_i[b][bc])[0]
+        constrained_indices = copy.deepcopy(self.model.constrained_indices)
+
+        for c, constraint in enumerate(constrained_indices):
+            mask = (np.ones_like(constrained_indices[c]) == 1)
+            for i in range(len(constrained_indices[c])):
+                pos = np.where(j == constrained_indices[c][i])[0]
                 if len(pos) == 1:
-                    pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
-                    self.model.constrained_bounded_indices[b][pos2] = pos[0]
+                    self.model.constrained_indices[c][i] = pos
                 else:
-                    if len(self.model.constrained_bounded_indices[b]) == 1:
-                        # if it's the last index to be removed
-                        # the logic here is just a mess. If we remove the last one, then all the
-                        # b-indices change and we have to iterate through everything to find our
-                        # current index. Can't deal with this right now.
-                        raise NotImplementedError
+                    mask[i] = False
 
-                    else: # just remove it from the indices
-                        mask = self.model.constrained_bounded_indices[b] != bc
-                        self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
+            self.model.constrained_indices[c] = self.model.constrained_indices[c][mask]
+        return constrained_indices
+        # back them up
+        # bounded_i = copy.deepcopy(self.model.constrained_bounded_indices)
+        # bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers)
+        # bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers)
+
+        # for b in range(len(bounded_i)): # for each group of constraints
+        #     for bc in range(len(bounded_i[b])):
+        #         pos = np.where(j == bounded_i[b][bc])[0]
+        #         if len(pos) == 1:
+        #             pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
+        #             self.model.constrained_bounded_indices[b][pos2] = pos[0]
+        #         else:
+        #             if len(self.model.constrained_bounded_indices[b]) == 1:
+        #                 # if it's the last index to be removed
+        #                 # the logic here is just a mess. If we remove the last one, then all the
+        #                 # b-indices change and we have to iterate through everything to find our
+        #                 # current index. Can't deal with this right now.
+        #                 raise NotImplementedError
+
+        #             else: # just remove it from the indices
+        #                 mask = self.model.constrained_bounded_indices[b] != bc
+        #                 self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
 
 
-        # here we shif the positive constraints. We cycle through each positive
-        # constraint
-        positive = self.model.constrained_positive_indices.copy()
-        mask = (np.ones_like(positive) == 1)
-        for p in range(len(positive)):
-            # we now check whether the constrained index appears in the j vector
-            # (the vector of the "active" indices)
-            pos = np.where(j == self.model.constrained_positive_indices[p])[0]
-            if len(pos) == 1:
-                self.model.constrained_positive_indices[p] = pos
-            else:
-                mask[p] = False
-        self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
+        # # here we shif the positive constraints. We cycle through each positive
+        # # constraint
+        # positive = self.model.constrained_positive_indices.copy()
+        # mask = (np.ones_like(positive) == 1)
+        # for p in range(len(positive)):
+        #     # we now check whether the constrained index appears in the j vector
+        #     # (the vector of the "active" indices)
+        #     pos = np.where(j == self.model.constrained_positive_indices[p])[0]
+        #     if len(pos) == 1:
+        #         self.model.constrained_positive_indices[p] = pos
+        #     else:
+        #         mask[p] = False
+        # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
 
-        return (bounded_i, bounded_l, bounded_u), positive
-
-    def restore_constraints(self, b, p):
-        self.model.constrained_bounded_indices = b[0]
-        self.model.constrained_bounded_lowers = b[1]
-        self.model.constrained_bounded_uppers = b[2]
-        self.model.constrained_positive_indices = p
+        # return (bounded_i, bounded_l, bounded_u), positive
 
+    def restore_constraints(self, c):#b, p):
+        # self.model.constrained_bounded_indices = b[0]
+        # self.model.constrained_bounded_lowers = b[1]
+        # self.model.constrained_bounded_uppers = b[2]
+        # self.model.constrained_positive_indices = p
+        self.model.constrained_indices = c
+        
     def get_param_shapes(self, N = None, Q = None):
         model_name = self.model.__class__.__name__
         if model_name == 'GPLVM':
@@ -168,8 +183,8 @@ class opt_SGD(Optimizer):
         if self.model.N == 0 or Y.std() == 0.0:
             return 0, step, self.model.N
 
-        self.model.likelihood._mean = Y.mean()
-        self.model.likelihood._std = Y.std()
+        self.model.likelihood._bias = Y.mean()
+        self.model.likelihood._scale = Y.std()
         self.model.likelihood.set_data(Y)
 
         j = self.subset_parameter_vector(self.x_opt, samples, shapes)
@@ -181,16 +196,16 @@ class opt_SGD(Optimizer):
             self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
             self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT)
 
-        b, p = self.shift_constraints(j)
+        ci = self.shift_constraints(j)
         f, fp = f_fp(self.x_opt[j])
         step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
         self.x_opt[j] -= step[j]
 
-        self.restore_constraints(b, p)
-        # restore likelihood _mean and _std, otherwise when we call set_data(y) on
+        self.restore_constraints(ci)
+        # restore likelihood _bias and _scale, otherwise when we call set_data(y) on
         # the next feature, it will get normalized with the mean and std of this one.
-        self.model.likelihood._mean = 0
-        self.model.likelihood._std = 1
+        self.model.likelihood._bias = 0
+        self.model.likelihood._scale = 1
 
         return f, step, self.model.N
 
@@ -200,8 +215,8 @@ class opt_SGD(Optimizer):
 
         self.model.likelihood.YYT = None
         self.model.likelihood.trYYT = None
-        self.model.likelihood._mean = 0.0
-        self.model.likelihood._std = 1.0
+        self.model.likelihood._bias = 0.0
+        self.model.likelihood._scale = 1.0
 
         N, Q = self.model.X.shape
         D = self.model.likelihood.Y.shape[1]
@@ -217,6 +232,7 @@ class opt_SGD(Optimizer):
             else:
                 features = np.argsort(NLL)
 
+                import pdb; pdb.set_trace()
             b = len(features)/self.batch_size
             features = [features[i::b] for i in range(b)]
             NLL = []
@@ -250,7 +266,6 @@ class opt_SGD(Optimizer):
                 # plt.clf()
                 # plt.plot(self.param_traces['noise'])
 
-                # import pdb; pdb.set_trace()
                 # for k in self.param_traces.keys():
                 #     self.param_traces[k].append(self.model.get(k)[0])
 

From 16757a298c82417d2ea1a75267e8adb2c102d9f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= <nicolo.fusi@gmail.com>
Date: Fri, 10 May 2013 16:21:12 +0200
Subject: [PATCH 4/6] trying to follow changes in likelihood

---
 GPy/inference/SGD.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index 778c79d7..177af58e 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -157,7 +157,7 @@ class opt_SGD(Optimizer):
         # self.model.constrained_bounded_uppers = b[2]
         # self.model.constrained_positive_indices = p
         self.model.constrained_indices = c
-        
+
     def get_param_shapes(self, N = None, Q = None):
         model_name = self.model.__class__.__name__
         if model_name == 'GPLVM':
@@ -186,6 +186,12 @@ class opt_SGD(Optimizer):
         self.model.likelihood._bias = Y.mean()
         self.model.likelihood._scale = Y.std()
         self.model.likelihood.set_data(Y)
+        # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
+
+        sigma = self.model.likelihood._variance
+        self.model.likelihood._variance = None # invalidate cache
+        self.model.likelihood._set_params(sigma)
+
 
         j = self.subset_parameter_vector(self.x_opt, samples, shapes)
         self.model.X = X[samples]
@@ -232,7 +238,6 @@ class opt_SGD(Optimizer):
             else:
                 features = np.argsort(NLL)
 
-                import pdb; pdb.set_trace()
             b = len(features)/self.batch_size
             features = [features[i::b] for i in range(b)]
             NLL = []
@@ -241,6 +246,11 @@ class opt_SGD(Optimizer):
                 self.model.D = len(j)
                 self.model.likelihood.D = len(j)
                 self.model.likelihood.set_data(Y[:, j])
+                # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision
+
+                sigma = self.model.likelihood._variance
+                self.model.likelihood._variance = None # invalidate cache
+                self.model.likelihood._set_params(sigma)
 
                 if missing_data:
                     shapes = self.get_param_shapes(N, Q)

From 80c25c9aafb759273d68bca001f35949f134ceac Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 13 May 2013 17:04:59 +0100
Subject: [PATCH 5/6] moved linalg function to GPy.linalg

---
 GPy/inference/SGD.py    | 10 ++++++++--
 GPy/models/sparse_GP.py |  7 +------
 GPy/util/linalg.py      |  5 +++++
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index 177af58e..bfc6ee15 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -206,8 +206,9 @@ class opt_SGD(Optimizer):
         f, fp = f_fp(self.x_opt[j])
         step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
         self.x_opt[j] -= step[j]
-
         self.restore_constraints(ci)
+
+        self.model.grads[j] = fp
         # restore likelihood _bias and _scale, otherwise when we call set_data(y) on
         # the next feature, it will get normalized with the mean and std of this one.
         self.model.likelihood._bias = 0
@@ -217,6 +218,8 @@ class opt_SGD(Optimizer):
 
     def opt(self, f_fp=None, f=None, fp=None):
         self.x_opt = self.model._get_params_transformed()
+        self.model.grads = np.zeros_like(self.x_opt)
+
         X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
 
         self.model.likelihood.YYT = None
@@ -287,7 +290,10 @@ class opt_SGD(Optimizer):
             self.model.likelihood.N = N
             self.model.likelihood.D = D
             self.model.likelihood.Y = Y
-
+            sigma = self.model.likelihood._variance
+            self.model.likelihood._variance = None # invalidate cache
+            self.model.likelihood._set_params(sigma)
+            
             self.trace.append(self.f_opt)
             if self.iteration_file is not None:
                 f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 3a45488b..ed5a5004 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -3,17 +3,12 @@
 
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, tdot, symmetrify
+from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
 from scipy import linalg
 
-def backsub_both_sides(L, X):
-    """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
-    tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
-    return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
-
 class sparse_GP(GP):
     """
     Variational sparse GP model
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index fb60a8ee..a8225a30 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -303,3 +303,8 @@ def cholupdate(L,x):
     x = x.copy()
     N = x.size
     weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
+
+def backsub_both_sides(L, X):
+    """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
+    tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
+    return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T

From 542759353b63a5ca3a04fe9f0c96c2b7972c1f55 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 13 May 2013 17:23:49 +0100
Subject: [PATCH 6/6] minor changes to the symmetrify function

---
 GPy/kern/linear.py | 2 +-
 GPy/util/linalg.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 16ef2499..af3e60ea 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -203,7 +203,7 @@ class linear(kernpart):
                 target_mu(n,q) += factor*tmp;
                 target_S(n,q) += factor*AZZA_2(q,m,mm,q);
               }
-            }  
+            }
           }
         }
         """
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 64155dca..555345d9 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -257,16 +257,20 @@ def symmetrify(A,upper=False):
     N,M = A.shape
     assert N==M
     c_contig_code = """
+    int iN;
     for (int i=1; i<N; i++){
+      iN = i*N;
       for (int j=0; j<i; j++){
-        A[i+j*N] = A[i*N+j];
+        A[i+j*N] = A[iN+j];
       }
     }
     """
     f_contig_code = """
+    int iN;
     for (int i=1; i<N; i++){
+      iN = i*N;
       for (int j=0; j<i; j++){
-        A[i*N+j] = A[i+j*N];
+        A[iN+j] = A[i+j*N];
       }
     }
     """