From 338f3295b15025e4abd9332042e53f80e9600c06 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 12 Apr 2013 13:31:15 +0100
Subject: [PATCH 1/4] now returning the ax for plot_latent in BGPLVM

---
 GPy/models/Bayesian_GPLVM.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index ba9603bb..aaaefa7f 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -95,3 +95,5 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             input_1, input_2 = which_indices
         ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
         ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
+
+        return ax

From c7e8345c9614300fbfe20102026864df0756af5b Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 12 Apr 2013 13:31:45 +0100
Subject: [PATCH 2/4] --march=native was causing problems on the stupid
 compiler on MacOS

---
 GPy/kern/rbf.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index a26bb79c..ff5d6ff3 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -228,9 +228,8 @@ class rbf(kernpart):
 
     def weave_psi2(self,mu,Zhat):
         weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -march=native'],
-                         'extra_link_args'   : ['-lgomp'],
-                         'compiler'          : 'gcc'}
+                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
+                         'extra_link_args'   : ['-lgomp']}
 
         N,Q = mu.shape
         M = Zhat.shape[0]

From ffa1879cfc7cb93b7921e33ce11029bd5efa7f8c Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 12 Apr 2013 13:32:18 +0100
Subject: [PATCH 3/4] added automatic scale_factor to sparse GPs

---
 GPy/models/sparse_GP.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 88abf77d..cebcba0b 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -36,7 +36,7 @@ class sparse_GP(GP):
 
     def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
         self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
-
+        self.auto_scale_factor = False
         self.Z = Z
         self.Zslices = Zslices
         self.Xslices = Xslices
@@ -184,6 +184,8 @@ class sparse_GP(GP):
         self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
         self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
         self._compute_kernel_matrices()
+        if self.auto_scale_factor:
+            self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
         self._computations()
 
     def _get_params(self):

From c8d64a4a69bd193833ac61c3fad721b6cdb3eecc Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 12 Apr 2013 13:32:27 +0100
Subject: [PATCH 4/4] minor changes

---
 GPy/examples/dimensionality_reduction.py |  2 +-
 GPy/inference/SGD.py                     | 53 ++++++++----------------
 2 files changed, 19 insertions(+), 36 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 61a4abd8..b8c60a09 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -120,7 +120,7 @@ def brendan_faces():
 def stick():
     data = GPy.util.datasets.stick()
     m = GPy.models.GPLVM(data['Y'], 2)
-    
+
     # optimize
     m.ensure_default_constraints()
     m.optimize(messages=1, max_f_eval=10000)
diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index 13a325b0..5d1b673d 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -75,7 +75,10 @@ class opt_SGD(Optimizer):
         return (np.isnan(data).sum(axis=1) == 0)
 
     def check_for_missing(self, data):
-        return np.isnan(data).sum() > 0
+        if sp.sparse.issparse(self.model.likelihood.Y):
+            return True
+        else:
+            return np.isnan(data).sum() > 0
 
     def subset_parameter_vector(self, x, samples, param_shapes):
         subset = np.array([], dtype = int)
@@ -149,10 +152,10 @@ class opt_SGD(Optimizer):
         else:
             raise NotImplementedError
 
-    def step_with_missing_data(self, f_fp, X, step, shapes, sparse_matrix):
+    def step_with_missing_data(self, f_fp, X, step, shapes):
         N, Q = X.shape
 
-        if not sparse_matrix:
+        if not sp.sparse.issparse(self.model.likelihood.Y):
             Y = self.model.likelihood.Y
             samples = self.non_null_samples(self.model.likelihood.Y)
             self.model.N = samples.sum()
@@ -165,7 +168,6 @@ class opt_SGD(Optimizer):
         if self.model.N == 0 or Y.std() == 0.0:
             return 0, step, self.model.N
 
-        # FIXME: get rid of self.center, everything should be centered by default
         self.model.likelihood._mean = Y.mean()
         self.model.likelihood._std = Y.std()
         self.model.likelihood.set_data(Y)
@@ -173,10 +175,6 @@ class opt_SGD(Optimizer):
         j = self.subset_parameter_vector(self.x_opt, samples, shapes)
         self.model.X = X[samples]
 
-        # if self.center:
-        #     self.model.likelihood.Y -= self.model.likelihood.Y.mean()
-        #     self.model.likelihood.Y /= self.model.likelihood.Y.std()
-
         model_name = self.model.__class__.__name__
 
         if model_name == 'Bayesian_GPLVM':
@@ -185,33 +183,31 @@ class opt_SGD(Optimizer):
 
         b, p = self.shift_constraints(j)
         f, fp = f_fp(self.x_opt[j])
-        # momentum_term = self.momentum * step[j]
-        # step[j] = self.learning_rate[j] * fp
-        # self.x_opt[j] -= step[j] + momentum_term
-
         step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
         self.x_opt[j] -= step[j]
 
         self.restore_constraints(b, p)
+        # restore likelihood _mean and _std, otherwise when we call set_data(y) on
+        # the next feature, it will get normalized with the mean and std of this one.
+        self.model.likelihood._mean = 0
+        self.model.likelihood._std = 1
 
         return f, step, self.model.N
 
     def opt(self, f_fp=None, f=None, fp=None):
         self.x_opt = self.model._get_params_transformed()
         X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
-        N, Q = self.model.X.shape
-        D = self.model.likelihood.Y.shape[1]
-        self.trace = []
-        sparse_matrix = sp.sparse.issparse(self.model.likelihood.Y)
-        missing_data = True
-        if not sparse_matrix:
-            missing_data = self.check_for_missing(self.model.likelihood.Y)
 
         self.model.likelihood.YYT = None
         self.model.likelihood.trYYT = None
         self.model.likelihood._mean = 0.0
         self.model.likelihood._std = 1.0
+
+        N, Q = self.model.X.shape
+        D = self.model.likelihood.Y.shape[1]
         num_params = self.model._get_params()
+        self.trace = []
+        missing_data = self.check_for_missing(self.model.likelihood.Y)
 
         step = np.zeros_like(num_params)
         for it in range(self.iterations):
@@ -224,34 +220,26 @@ class opt_SGD(Optimizer):
             b = len(features)/self.batch_size
             features = [features[i::b] for i in range(b)]
             NLL = []
-            count = 0
-            last_printed_count = -1
 
-            for j in features:
-                count += 1
+            for count, j in enumerate(features):
                 self.model.D = len(j)
                 self.model.likelihood.D = len(j)
                 self.model.likelihood.set_data(Y[:, j])
 
-                if missing_data or sparse_matrix:
+                if missing_data:
                     shapes = self.get_param_shapes(N, Q)
-                    f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes, sparse_matrix)
+                    f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
                 else:
                     Nj = N
                     f, fp = f_fp(self.x_opt)
-                    # momentum_term = self.momentum * step # compute momentum using update(t-1)
-                    # step = self.learning_rate * fp # compute update(t)
-                    # self.x_opt -= step + momentum_term
                     step = self.momentum * step + self.learning_rate * fp
                     self.x_opt -= step
 
-
                 if self.messages == 2:
                     noise = self.model.likelihood._variance
                     status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
                     sys.stdout.write(status)
                     sys.stdout.flush()
-                    last_printed_count = count
                     self.param_traces['noise'].append(noise)
                 NLL.append(f)
 
@@ -269,7 +257,6 @@ class opt_SGD(Optimizer):
             self.model.likelihood.D = D
             self.model.likelihood.Y = Y
 
-            # self.model.Youter = np.dot(Y, Y.T)
             self.trace.append(self.f_opt)
             if self.iteration_file is not None:
                 f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
@@ -282,7 +269,3 @@ class opt_SGD(Optimizer):
                 status = "SGD Iteration: {0: 3d}/{1: 3d}  f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt)
                 sys.stdout.write(status)
                 sys.stdout.flush()
-
-
-
-