diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py
index f1532bc5..2151c39c 100644
--- a/GPy/inference/optimization/stochastics.py
+++ b/GPy/inference/optimization/stochastics.py
@@ -5,6 +5,10 @@ class StochasticStorage(object):
     '''
     This is a container for holding the stochastic parameters,
     such as subset indices or step length and so on.
+    
+    self.d has to be a list of lists:
+    [dimension indices, nan indices for those dimensions]
+    so that the minibatches can be used as efficiently as possible.10
     '''
     def __init__(self, model):
         """
@@ -28,9 +32,23 @@ class SparseGPMissing(StochasticStorage):
         """
         Here we want to loop over all dimensions everytime.
         Thus, we can just make sure the loop goes over self.d every
-        time.
+        time. We will try to get batches which look the same together
+        which speeds up calculations significantly.
         """
-        self.d = range(model.Y_normalized.shape[1])
+        import numpy as np
+        self.Y = model.Y_normalized
+        bdict = {}
+        for d in range(self.Y.shape[1]):
+            inan = np.isnan(self.Y[:, d])
+            arr_str = np.array2string(inan, 
+                                      np.inf, 0, 
+                                      True, '', 
+                                      formatter={'bool':lambda x: '1' if x else '0'})
+            try:
+                bdict[arr_str][0].append(d)
+            except:
+                bdict[arr_str] = [[d], ~inan]
+        self.d = bdict.values()
 
 class SparseGPStochastics(StochasticStorage):
     """
@@ -40,16 +58,29 @@ class SparseGPStochastics(StochasticStorage):
     def __init__(self, model, batchsize=1):
         self.batchsize = batchsize
         self.output_dim = model.Y.shape[1]
+        self.Y = model.Y_normalized
         self.reset()
         self.do_stochastics()
 
     def do_stochastics(self):
         if self.batchsize == 1:
             self.current_dim = (self.current_dim+1)%self.output_dim
-            self.d = [self.current_dim]
+            self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
         else:
             import numpy as np
             self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
+            bdict = {}
+            for d in self.d:
+                inan = np.isnan(self.Y[:, d])
+                arr_str = np.array2string(inan, 
+                                          np.inf, 0, 
+                                          True, '', 
+                                          formatter={'bool':lambda x: '1' if x else '0'})
+                try:
+                    bdict[arr_str][0].append(d)
+                except:
+                    bdict[arr_str] = [[d], ~inan]
+            self.d = bdict.values()
 
     def reset(self):
         self.current_dim = -1
diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index ad62043a..07295255 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -63,33 +63,18 @@ class SparseGPMiniBatch(SparseGP):
 
         if stochastic and missing_data:
             self.missing_data = True
-            self.ninan = ~np.isnan(Y)
             self.stochastics = SparseGPStochastics(self, batchsize)
         elif stochastic and not missing_data:
             self.missing_data = False
             self.stochastics = SparseGPStochastics(self, batchsize)
         elif missing_data:
             self.missing_data = True
-            self.ninan = ~np.isnan(Y)
             self.stochastics = SparseGPMissing(self)
         else:
             self.stochastics = False
 
         logger.info("Adding Z as parameter")
         self.link_parameter(self.Z, index=0)
-        if self.missing_data:
-            self.Ylist = []
-            overall = self.Y_normalized.shape[1]
-            m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
-            message = m_f(-1)
-            print(message, end=' ')
-            for d in range(overall):
-                self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
-                print(' '*(len(message)+1) + '\r', end=' ')
-                message = m_f(d)
-                print(message, end=' ')
-            print('')
-
         self.posterior = None
 
     def has_uncertain_inputs(self):
@@ -245,8 +230,7 @@ class SparseGPMiniBatch(SparseGP):
             message = m_f(-1)
             print(message, end=' ')
 
-        for d in self.stochastics.d:
-            ninan = self.ninan[:, d]
+        for d, ninan in self.stochastics.d:
 
             if not self.stochastics:
                 print(' '*(len(message)) + '\r', end=' ')
@@ -257,7 +241,7 @@ class SparseGPMiniBatch(SparseGP):
                 grad_dict, current_values, value_indices = self._inner_parameters_changed(
                                 self.kern, self.X[ninan],
                                 self.Z, self.likelihood,
-                                self.Ylist[d], self.Y_metadata,
+                                self.Y_normalized[ninan][:, d], self.Y_metadata,
                                 Lm, dL_dKmm,
                                 subset_indices=dict(outputs=d, samples=ninan))
 
@@ -266,8 +250,8 @@ class SparseGPMiniBatch(SparseGP):
 
             Lm = posterior.K_chol
             dL_dKmm = grad_dict['dL_dKmm']
-            woodbury_inv[:, :, d] = posterior.woodbury_inv
-            woodbury_vector[:, d:d+1] = posterior.woodbury_vector
+            woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
+            woodbury_vector[:, d] = posterior.woodbury_vector
             self._log_marginal_likelihood += log_marginal_likelihood
         if not self.stochastics:
             print('')