diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index a640e360..f79f4e6f 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -23,9 +23,6 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
     X = _np.random.rand(num_inputs, input_dim)
     lengthscales = _np.random.rand(input_dim)
     k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
-         ##+ GPy.kern.white(input_dim, 0.01)
-         #)
-    #k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
     K = k.K(X)
     Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
 
@@ -159,7 +156,6 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
     import GPy
     from matplotlib import pyplot as plt
-    from ..util.misc import param_to_array
     import numpy as np
 
     _np.random.seed(0)
@@ -177,7 +173,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
         m.plot_latent(ax=latent_axes, labels=m.data_labels)
         data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
             m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
         raw_input('Press enter to finish')
         plt.close(fig)
@@ -186,8 +182,6 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
 def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
     import GPy
     from matplotlib import pyplot as plt
-    from ..util.misc import param_to_array
-    import numpy as np
 
     _np.random.seed(0)
     data = GPy.util.datasets.oil()
@@ -204,7 +198,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
         m.plot_latent(ax=latent_axes, labels=m.data_labels)
         data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
             m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
         raw_input('Press enter to finish')
         plt.close(fig)
@@ -228,10 +222,10 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
     Ylist = [Y1, Y2, Y3]
 
     if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
         import matplotlib.cm as cm
         import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
         fig.clf()
         ax = fig.add_subplot(2, 1, 1)
         labls = slist_names
@@ -242,29 +236,11 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
             ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
             ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
             ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
-        pylab.tight_layout()
+        plt.draw()
+        plt.tight_layout()
 
     return slist, [S1, S2, S3], Ylist
 
-def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
-    S1 = _np.hstack([s1, sS])
-    S2 = _np.hstack([s2, s3, sS])
-    S3 = _np.hstack([s3, sS])
-    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
-    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
-    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
-    Y1 += .3 * _np.random.randn(*Y1.shape)
-    Y2 += .2 * _np.random.randn(*Y2.shape)
-    Y3 += .25 * _np.random.randn(*Y3.shape)
-    Y1 -= Y1.mean(0)
-    Y2 -= Y2.mean(0)
-    Y3 -= Y3.mean(0)
-    Y1 /= Y1.std(0)
-    Y2 /= Y2.std(0)
-    Y3 /= Y3.std(0)
-    return Y1, Y2, Y3, S1, S2, S3
-
 def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
     _np.random.seed(1234)
 
@@ -291,10 +267,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
     Ylist = [Y1, Y2, Y3]
 
     if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
         import matplotlib.cm as cm
         import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
         fig.clf()
         ax = fig.add_subplot(2, 1, 1)
         labls = slist_names
@@ -305,28 +281,28 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
             ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
             ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
             ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
-        pylab.tight_layout()
+        plt.draw()
+        plt.tight_layout()
 
     return slist, [S1, S2, S3], Ylist
 
-# def bgplvm_simulation_matlab_compare():
-#     from GPy.util.datasets import simulation_BGPLVM
-#     from GPy import kern
-#     from GPy.models import BayesianGPLVM
-#
-#     sim_data = simulation_BGPLVM()
-#     Y = sim_data['Y']
-#     mu = sim_data['mu']
-#     num_inducing, [_, Q] = 3, mu.shape
-#
-#     k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2))
-#     m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k,
-#                        _debug=False)
-#     m.auto_scale_factor = True
-#     m['noise'] = Y.var() / 100.
-#     m['linear_variance'] = .01
-#     return m
+def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
+    S1 = _np.hstack([s1, sS])
+    S2 = _np.hstack([s2, s3, sS])
+    S3 = _np.hstack([s3, sS])
+    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
+    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
+    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
+    Y1 += .3 * _np.random.randn(*Y1.shape)
+    Y2 += .2 * _np.random.randn(*Y2.shape)
+    Y3 += .25 * _np.random.randn(*Y3.shape)
+    Y1 -= Y1.mean(0)
+    Y2 -= Y2.mean(0)
+    Y3 -= Y3.mean(0)
+    Y1 /= Y1.std(0)
+    Y2 /= Y2.std(0)
+    Y3 /= Y3.std(0)
+    return Y1, Y2, Y3, S1, S2, S3
 
 def bgplvm_simulation(optimize=True, verbose=1,
                       plot=True, plot_sim=False,
diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index 3aeb4fbb..7c8041ce 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -1,7 +1,6 @@
 import numpy as np
 from ...util import diag
 from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
-from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from . import LatentFunctionInference
 from posterior import Posterior
@@ -23,7 +22,7 @@ class EPDTC(LatentFunctionInference):
         self.get_YYTfactor.limit = limit
 
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def __getstate__(self):
         # has to be overridden, as Cacher objects cannot be pickled.
@@ -44,7 +43,7 @@ class EPDTC(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y
         else:
             return jitchol(tdot(Y))
 
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 1c153518..a815d433 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -12,7 +12,6 @@
 
 import numpy as np
 from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
-from ...util.misc import param_to_array
 from posterior import Posterior
 import warnings
 from scipy import optimize
@@ -39,9 +38,6 @@ class Laplace(LatentFunctionInference):
         Returns a Posterior class containing essential quantities of the posterior
         """
 
-        #make Y a normal array!
-        Y = param_to_array(Y)
-
         # Compute K
         K = kern.K(X)
 
@@ -153,7 +149,7 @@ class Laplace(LatentFunctionInference):
 
         #compute vital matrices
         C = np.dot(LiW12, K)
-        Ki_W_i  = K - C.T.dot(C) 
+        Ki_W_i  = K - C.T.dot(C)
 
         #compute the log marginal
         log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index 4f21bc29..64ee30c4 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -6,7 +6,6 @@ from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrt
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 import logging, itertools
@@ -35,7 +34,7 @@ class VarDTC(LatentFunctionInference):
         self.get_YYTfactor.limit = limit
 
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def __getstate__(self):
         # has to be overridden, as Cacher objects cannot be pickled.
@@ -56,7 +55,7 @@ class VarDTC(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
 
diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py
index 3bd5c347..f7da9080 100644
--- a/GPy/inference/latent_function_inference/var_dtc_gpu.py
+++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py
@@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 
@@ -32,18 +31,18 @@ class VarDTC_GPU(LatentFunctionInference):
     """
     const_jitter = np.float64(1e-6)
     def __init__(self, batchsize=None, gpu_memory=4., limit=1):
-        
+
         self.batchsize = batchsize
         self.gpu_memory = gpu_memory
-                
+
         self.midRes = {}
         self.batch_pos = 0 # the starting position of the current mini-batch
-        
+
         self.cublas_handle = gpu_init.cublas_handle
-        
+
         # Initialize GPU caches
         self.gpuCache = None
-        
+
     def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
         ndata = Y.shape[0]
         if self.batchsize==None:
@@ -75,10 +74,10 @@ class VarDTC_GPU(LatentFunctionInference):
                              'psi2p_gpu'            :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
                              }
             self.gpuCache['ones_gpu'].fill(1.0)
-            
+
             YT_gpu = self.gpuCache['YT_gpu']
             self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
-            
+
     def _estimateMemoryOccupation(self, N, M, D):
         """
         Estimate the best batch size.
@@ -89,7 +88,7 @@ class VarDTC_GPU(LatentFunctionInference):
         unit: GB
         """
         return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
-    
+
     def _estimateBatchSize(self, kern, N, M, Q, D):
         """
         Estimate the best batch size.
@@ -104,11 +103,11 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             x0, x1 = 0.,0.
         y0, y1 = self._estimateMemoryOccupation(N, M, D)
-        
+
         opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
-        
+
         return opt_batchsize
-        
+
     def _get_YYTfactor(self, Y):
         """
         find a matrix L which satisfies LLT = YYT.
@@ -117,10 +116,10 @@ class VarDTC_GPU(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
-        
+
     def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise):
         num_inducing, input_dim = Z.shape[0], Z.shape[1]
         num_data, output_dim = Y.shape
@@ -130,7 +129,7 @@ class VarDTC_GPU(LatentFunctionInference):
         beta_gpu = self.gpuCache['beta_gpu']
         YT_gpu = self.gpuCache['YT_gpu']
         betaYT_gpu = self.gpuCache['betaYT_gpu']
-        
+
         beta_gpu.fill(beta)
         betaYT_gpu.fill(0.)
         cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
@@ -140,7 +139,7 @@ class VarDTC_GPU(LatentFunctionInference):
             psi1Y_gpu.fill(0.)
             psi2_gpu.fill(0.)
             psi0_full = 0
-            
+
             for n_start in xrange(0,num_data,self.batchsize):
                 n_end = min(self.batchsize+n_start, num_data)
                 ndata = n_end - n_start
@@ -156,35 +155,35 @@ class VarDTC_GPU(LatentFunctionInference):
                     psi1p_gpu = kern.K(X_slice, Z)
 
                 cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
-                
+
                 psi0_full += psi0.sum()
-                                    
+
                 if uncertain_inputs:
                     sum_axis(psi2_gpu,psi2p_gpu,1,1)
                 else:
                     cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
-                    
+
             psi0_full *= beta
             if uncertain_inputs:
                 cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
-            
-        else:    
+
+        else:
             psi2_full = np.zeros((num_inducing,num_inducing))
             psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
             psi0_full = 0.
             YRY_full = 0.
-            
-            for n_start in xrange(0,num_data,self.batchsize):            
+
+            for n_start in xrange(0,num_data,self.batchsize):
                 n_end = min(self.batchsize+n_start, num_data)
                 Y_slice = Y[n_start:n_end]
                 X_slice = X[n_start:n_end]
-                
+
                 if het_noise:
                     b = beta[n_start]
                     YRY_full += np.inner(Y_slice, Y_slice)*b
                 else:
                     b = beta
-                
+
                 if uncertain_inputs:
                     psi0 = kern.psi0(Z, X_slice)
                     psi1 = kern.psi1(Z, X_slice)
@@ -193,50 +192,50 @@ class VarDTC_GPU(LatentFunctionInference):
                     psi0 = kern.Kdiag(X_slice)
                     psi1 = kern.K(X_slice, Z)
                     psi2_full += np.dot(psi1.T,psi1)*b
-                    
+
                 psi0_full += psi0.sum()*b
-                psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM                
-    
+                psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
+
             if not het_noise:
                 YRY_full = trYYT*beta
             psi1Y_gpu.set(psi1Y_full)
             psi2_gpu.set(psi2_full)
-    
+
         return psi0_full, YRY_full
-        
+
     def inference_likelihood(self, kern, X, Z, likelihood, Y):
         """
         The first phase of inference:
         Compute: log-likelihood, dL_dKmm
-        
+
         Cached intermediate results: Kmm, KmmInv,
         """
-        
+
         num_inducing, input_dim = Z.shape[0], Z.shape[1]
         num_data, output_dim = Y.shape
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
         if het_noise:
             self.batchsize=0
-        
+
         self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
 
         if isinstance(X, VariationalPosterior):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         psi1Y_gpu = self.gpuCache['psi1Y_gpu']
         psi2_gpu = self.gpuCache['psi2_gpu']
-        
+
         psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise)
-        
+
         #======================================================================
         # Compute Common Components
         #======================================================================
-        
+
         Kmm = kern.K(Z).copy()
         Kmm_gpu = self.gpuCache['Kmm_gpu']
         Kmm_gpu.set(np.asfortranarray(Kmm))
@@ -244,14 +243,14 @@ class VarDTC_GPU(LatentFunctionInference):
         ones_gpu = self.gpuCache['ones_gpu']
         cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
 #         assert np.allclose(Kmm, Kmm_gpu.get())
-        
+
 #         Lm = jitchol(Kmm)
         #
         Lm_gpu = self.gpuCache['Lm_gpu']
         cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
         culinalg.cho_factor(Lm_gpu,'L')
 #         print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
-                
+
 #         Lambda = Kmm+psi2_full
 #         LL = jitchol(Lambda)
         #
@@ -261,7 +260,7 @@ class VarDTC_GPU(LatentFunctionInference):
         LL_gpu = Lambda_gpu
         culinalg.cho_factor(LL_gpu,'L')
 #         print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
-        
+
 #         b,_ = dtrtrs(LL, psi1Y_full)
 #         bbt_cpu = np.square(b).sum()
         #
@@ -270,7 +269,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
         bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
 #         print np.abs(bbt-bbt_cpu)
-        
+
 #         v,_ = dtrtrs(LL.T,b,lower=False)
 #         vvt = np.einsum('md,od->mo',v,v)
 #         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
@@ -288,7 +287,7 @@ class VarDTC_GPU(LatentFunctionInference):
         tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
 #         print np.abs(vvt-vvt_gpu.get()).max()
 #         print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
-        
+
 #         Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
 #         LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
 #         KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
@@ -303,7 +302,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
         cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
 #         print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
-        
+
 #         dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
         #
         dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
@@ -311,7 +310,7 @@ class VarDTC_GPU(LatentFunctionInference):
         cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
         cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
 #         print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
-                        
+
         #======================================================================
         # Compute log-likelihood
         #======================================================================
@@ -320,7 +319,7 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             logL_R = -num_data*np.log(beta)
 #         logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
-        
+
         logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
         logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
         logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
@@ -329,7 +328,7 @@ class VarDTC_GPU(LatentFunctionInference):
         #======================================================================
         # Compute dL_dKmm
         #======================================================================
-        
+
 #         dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
         #
         dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
@@ -341,24 +340,24 @@ class VarDTC_GPU(LatentFunctionInference):
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)
         #======================================================================
-                
+
         post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
 
         #======================================================================
         # Compute dL_dthetaL for uncertian input and non-heter noise
-        #======================================================================        
-        
+        #======================================================================
+
         if not het_noise:
             dL_dthetaL = (YRY_full + output_dim*psi0_full - num_data*output_dim)/-2.
             dL_dthetaL += cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
             dL_dthetaL += cublas.cublasDdot(self.cublas_handle,v_gpu.size, v_gpu.gpudata,1,psi1Y_gpu.gpudata,1)
             self.midRes['dL_dthetaL'] = -beta*dL_dthetaL
-            
+
         return logL, dL_dKmm_gpu.get(), post
 
     def inference_minibatch(self, kern, X, Z, likelihood, Y):
         """
-        The second phase of inference: Computing the derivatives over a minibatch of Y 
+        The second phase of inference: Computing the derivatives over a minibatch of Y
         Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
         return a flag showing whether it reached the end of Y (isEnd)
         """
@@ -370,10 +369,10 @@ class VarDTC_GPU(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
-        
+
         n_start = self.batch_pos
         n_end = min(self.batchsize+n_start, num_data)
         if n_end==num_data:
@@ -382,12 +381,12 @@ class VarDTC_GPU(LatentFunctionInference):
         else:
             isEnd = False
             self.batch_pos = n_end
-        
+
         nSlice = n_end-n_start
         X_slice = X[n_start:n_end]
         if het_noise:
             beta = beta[n_start] # nSlice==1
-        
+
         if kern.useGPU:
             if not uncertain_inputs:
                 psi0p_gpu = kern.Kdiag(X_slice)
@@ -416,28 +415,28 @@ class VarDTC_GPU(LatentFunctionInference):
             psi1p_gpu.set(np.asfortranarray(psi1))
             if uncertain_inputs:
                 psi2p_gpu.set(np.asfortranarray(psi2))
-                            
+
         #======================================================================
         # Compute dL_dpsi
         #======================================================================
 
         dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
-        v_gpu = self.gpuCache['v_gpu']        
+        v_gpu = self.gpuCache['v_gpu']
         dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
         dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
         dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
         betaYT_gpu = self.gpuCache['betaYT_gpu']
         betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
-        
+
         # Adjust to the batch size
         if dL_dpsi0_gpu.shape[0] > nSlice:
             dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
             dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
-        
+
         dL_dpsi0_gpu.fill(-output_dim *beta/2.)
-        
+
         cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
-        
+
         if uncertain_inputs:
             cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1)
             cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.size, beta, dL_dpsi2_gpu.gpudata, 1)
@@ -458,7 +457,7 @@ class VarDTC_GPU(LatentFunctionInference):
             dL_dpsi1 = dL_dpsi1_gpu
         else:
             dL_dpsi0 = dL_dpsi0_gpu.get()
-            dL_dpsi1 = dL_dpsi1_gpu.get()            
+            dL_dpsi1 = dL_dpsi1_gpu.get()
         if uncertain_inputs:
             if kern.useGPU:
                 dL_dpsi2 = dL_dpsi2_gpu
@@ -480,4 +479,4 @@ class VarDTC_GPU(LatentFunctionInference):
                          'dL_dthetaL':dL_dthetaL}
 
         return isEnd, (n_start,n_end), grad_dict
-    
+
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index b9ecbb5c..ae25f3e3 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdi
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
-from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 
@@ -27,26 +26,26 @@ class VarDTC_minibatch(LatentFunctionInference):
     """
     const_jitter = 1e-6
     def __init__(self, batchsize=None, limit=1, mpi_comm=None):
-        
+
         self.batchsize = batchsize
         self.mpi_comm = mpi_comm
         self.limit = limit
-        
+
         # Cache functions
         from ...util.caching import Cacher
         self.get_trYYT = Cacher(self._get_trYYT, limit)
         self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
-        
+
         self.midRes = {}
         self.batch_pos = 0 # the starting position of the current mini-batch
         self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the posterior inference will be wrong
-        
+
     def __getstate__(self):
-        # has to be overridden, as Cacher objects cannot be pickled. 
+        # has to be overridden, as Cacher objects cannot be pickled.
         return self.batchsize, self.limit, self.Y_speedup
 
     def __setstate__(self, state):
-        # has to be overridden, as Cacher objects cannot be pickled. 
+        # has to be overridden, as Cacher objects cannot be pickled.
         self.batchsize, self.limit, self.Y_speedup = state
         self.mpi_comm = None
         self.midRes = {}
@@ -58,9 +57,9 @@ class VarDTC_minibatch(LatentFunctionInference):
     def set_limit(self, limit):
         self.get_trYYT.limit = limit
         self.get_YYTfactor.limit = limit
-        
+
     def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
 
     def _get_YYTfactor(self, Y):
         """
@@ -70,19 +69,19 @@ class VarDTC_minibatch(LatentFunctionInference):
         """
         N, D = Y.shape
         if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
         else:
             return jitchol(tdot(Y))
-        
+
     def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
-        
+
         het_noise = beta.size > 1
 
         trYYT = self.get_trYYT(Y)
         if self.Y_speedup and not het_noise:
             Y =  self.get_YYTfactor(Y)
-        
-        num_inducing = Z.shape[0]        
+
+        num_inducing = Z.shape[0]
         num_data, output_dim = Y.shape
         if self.batchsize == None:
             self.batchsize = num_data
@@ -91,8 +90,8 @@ class VarDTC_minibatch(LatentFunctionInference):
         psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
         psi0_full = 0.
         YRY_full = 0.
-        
-        for n_start in xrange(0,num_data,self.batchsize):            
+
+        for n_start in xrange(0,num_data,self.batchsize):
             n_end = min(self.batchsize+n_start, num_data)
             if (n_end-n_start)==num_data:
                 Y_slice = Y
@@ -100,13 +99,13 @@ class VarDTC_minibatch(LatentFunctionInference):
             else:
                 Y_slice = Y[n_start:n_end]
                 X_slice = X[n_start:n_end]
-            
+
             if het_noise:
                 b = beta[n_start]
                 YRY_full += np.inner(Y_slice, Y_slice)*b
             else:
                 b = beta
-            
+
             if uncertain_inputs:
                 psi0 = kern.psi0(Z, X_slice)
                 psi1 = kern.psi1(Z, X_slice)
@@ -115,13 +114,13 @@ class VarDTC_minibatch(LatentFunctionInference):
                 psi0 = kern.Kdiag(X_slice)
                 psi1 = kern.K(X_slice, Z)
                 psi2_full += np.dot(psi1.T,psi1)*b
-                
+
             psi0_full += psi0.sum()*b
-            psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM                
+            psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
 
         if not het_noise:
             YRY_full = trYYT*beta
-        
+
         if self.mpi_comm != None:
             psi0_all = np.array(psi0_full)
             psi1Y_all = psi1Y_full.copy()
@@ -132,18 +131,18 @@ class VarDTC_minibatch(LatentFunctionInference):
             self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
             self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
             return psi0_all, psi1Y_all, psi2_all, YRY_all
-            
+
         return psi0_full, psi1Y_full, psi2_full, YRY_full
-        
+
     def inference_likelihood(self, kern, X, Z, likelihood, Y):
         """
         The first phase of inference:
         Compute: log-likelihood, dL_dKmm
-        
+
         Cached intermediate results: Kmm, KmmInv,
         """
-        
-        num_data, output_dim = Y.shape 
+
+        num_data, output_dim = Y.shape
         input_dim = Z.shape[0]
         if self.mpi_comm != None:
             num_data_all = np.array(num_data,dtype=np.int32)
@@ -154,7 +153,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
@@ -162,28 +161,28 @@ class VarDTC_minibatch(LatentFunctionInference):
             self.batchsize = 1
 
         psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
-        
+
         #======================================================================
         # Compute Common Components
         #======================================================================
-        
+
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
         KmmInv,Lm,LmInv,_ = pdinv(Kmm)
-        
+
         LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
         LInv,LL,LLInv,logdet_L = pdinv(Lambda)
         b = LLInv.dot(LmInv.dot(psi1Y_full.T))
         bbt = np.square(b).sum()
         v = LmInv.T.dot(LLInv.T.dot(b))
-        
+
         dL_dpsi2R = LmInv.T.dot(-LLInv.T.dot(tdot(b)+output_dim*np.eye(input_dim)).dot(LLInv)+output_dim*np.eye(input_dim)).dot(LmInv)/2.
-        
+
         # Cache intermediate results
         self.midRes['dL_dpsi2R'] = dL_dpsi2R
         self.midRes['v'] = v
-                
+
         #======================================================================
         # Compute log-likelihood
         #======================================================================
@@ -196,22 +195,22 @@ class VarDTC_minibatch(LatentFunctionInference):
         #======================================================================
         # Compute dL_dKmm
         #======================================================================
-        
+
         dL_dKmm =  dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2.
 
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)
         #======================================================================
-        
+
         if not self.Y_speedup or het_noise:
             post = Posterior(woodbury_inv=LmInv.T.dot(np.eye(input_dim)-LInv).dot(LmInv), woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
         else:
             post = None
-        
+
         #======================================================================
         # Compute dL_dthetaL for uncertian input and non-heter noise
-        #======================================================================        
-        
+        #======================================================================
+
         if not het_noise:
             dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
             self.midRes['dL_dthetaL'] = dL_dthetaL
@@ -220,7 +219,7 @@ class VarDTC_minibatch(LatentFunctionInference):
 
     def inference_minibatch(self, kern, X, Z, likelihood, Y):
         """
-        The second phase of inference: Computing the derivatives over a minibatch of Y 
+        The second phase of inference: Computing the derivatives over a minibatch of Y
         Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
         return a flag showing whether it reached the end of Y (isEnd)
         """
@@ -231,7 +230,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             uncertain_inputs = True
         else:
             uncertain_inputs = False
-        
+
         #see whether we've got a different noise variance for each datum
         beta = 1./np.fmax(likelihood.variance, 1e-6)
         het_noise = beta.size > 1
@@ -241,7 +240,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             YYT_factor = self.get_YYTfactor(Y)
         else:
             YYT_factor = Y
-        
+
         n_start = self.batch_pos
         n_end = min(self.batchsize+n_start, num_data)
         if n_end==num_data:
@@ -250,10 +249,10 @@ class VarDTC_minibatch(LatentFunctionInference):
         else:
             isEnd = False
             self.batch_pos = n_end
-        
+
         Y_slice = YYT_factor[n_start:n_end]
         X_slice = X[n_start:n_end]
-        
+
         if not uncertain_inputs:
             psi0 = kern.Kdiag(X_slice)
             psi1 = kern.K(X_slice, Z)
@@ -264,33 +263,33 @@ class VarDTC_minibatch(LatentFunctionInference):
             psi1 = kern.psi1(Z, X_slice)
             psi2 = kern.psi2(Z, X_slice)
             betapsi1 = np.einsum('n,nm->nm',beta,psi1)
-            
+
         if het_noise:
             beta = beta[n_start] # assuming batchsize==1
 
         betaY = beta*Y_slice
-        
+
         #======================================================================
         # Load Intermediate Results
         #======================================================================
-        
+
         dL_dpsi2R = self.midRes['dL_dpsi2R']
         v = self.midRes['v']
-        
+
         #======================================================================
         # Compute dL_dpsi
         #======================================================================
-        
+
         dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
-        
+
         dL_dpsi1 = np.dot(betaY,v.T)
-        
+
         if uncertain_inputs:
             dL_dpsi2 = beta* dL_dpsi2R
         else:
             dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
             dL_dpsi2 = None
-            
+
         #======================================================================
         # Compute dL_dthetaL
         #======================================================================
@@ -300,14 +299,14 @@ class VarDTC_minibatch(LatentFunctionInference):
                 psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
             else:
                 psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
-            
+
             dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
         else:
             if isEnd:
                 dL_dthetaL = self.midRes['dL_dthetaL']
             else:
                 dL_dthetaL = 0.
-                
+
         if uncertain_inputs:
             grad_dict = {'dL_dpsi0':dL_dpsi0,
                          'dL_dpsi1':dL_dpsi1,
@@ -317,7 +316,7 @@ class VarDTC_minibatch(LatentFunctionInference):
             grad_dict = {'dL_dKdiag':dL_dpsi0,
                          'dL_dKnm':dL_dpsi1,
                          'dL_dthetaL':dL_dthetaL}
-            
+
         return isEnd, (n_start,n_end), grad_dict
 
 
@@ -330,18 +329,18 @@ def update_gradients(model, mpi_comm=None):
         X = model.X[model.N_range[0]:model.N_range[1]]
 
     model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
-    
+
     het_noise = model.likelihood.variance.size > 1
-    
+
     if het_noise:
         dL_dthetaL = np.empty((model.Y.shape[0],))
     else:
         dL_dthetaL = np.float64(0.)
-    
+
     kern_grad = model.kern.gradient.copy()
     kern_grad[:] = 0.
     model.Z.gradient = 0.
-    
+
     isEnd = False
     while not isEnd:
         isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
@@ -352,24 +351,24 @@ def update_gradients(model, mpi_comm=None):
                 X_slice = model.X[n_range[0]:n_range[1]]
             else:
                 X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
-            
+
             #gradients w.r.t. kernel
             model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
             kern_grad += model.kern.gradient
-    
+
             #gradients w.r.t. Z
             model.Z.gradient += model.kern.gradients_Z_expectations(
                                dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
-        
+
             #gradients w.r.t. posterior parameters of X
             X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
             model.set_X_gradients(X_slice, X_grad)
-                
+
             if het_noise:
                 dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
             else:
                 dL_dthetaL += grad_dict['dL_dthetaL']
-    
+
     # Gather the gradients from multiple MPI nodes
     if mpi_comm != None:
         if het_noise:
@@ -380,14 +379,14 @@ def update_gradients(model, mpi_comm=None):
         mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
         kern_grad = kern_grad_all
         model.Z.gradient = Z_grad_all
-    
+
     #gradients w.r.t. kernel
     model.kern.update_gradients_full(dL_dKmm, model.Z, None)
     model.kern.gradient += kern_grad
 
     #gradients w.r.t. Z
     model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
-    
+
     # Update Log-likelihood
     KL_div = model.variational_prior.KL_divergence(X)
     # update for the KL divergence
diff --git a/GPy/kern/_src/poly.py b/GPy/kern/_src/poly.py
index 4c5f0e93..b90e8f8f 100644
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 from kern import Kern
-from ...util.misc import param_to_array
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 class Poly(Kern):
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 744de6e7..3be2aed2 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -7,7 +7,6 @@ from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import VarDTC
-from ..util.misc import param_to_array
 from ..core.parameterization.variational import NormalPosterior
 
 class SparseGPRegression(SparseGP):
@@ -40,7 +39,7 @@ class SparseGPRegression(SparseGP):
         # Z defaults to a subset of the data
         if Z is None:
             i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
-            Z = param_to_array(X)[i].copy()
+            Z = X.view(np.ndarray)[i].copy()
         else:
             assert Z.shape[1] == input_dim
 
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 20e8e962..b4fed8fd 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -1,7 +1,6 @@
 
 import numpy as np
 from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
-from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from .base_plots import x_frame2D
 import itertools
@@ -55,9 +54,9 @@ def plot_latent(model, labels=None, which_indices=None,
     #fethch the data points X that we'd like to plot
     X = model.X
     if isinstance(X, VariationalPosterior):
-        X = param_to_array(X.mean)
+        X = X.mean
     else:
-        X = param_to_array(X)
+        X = X
 
 
     if X.shape[0] > 1000:
@@ -175,7 +174,7 @@ def plot_latent(model, labels=None, which_indices=None,
     ax.set_aspect('auto') # set a nice aspect ratio
 
     if plot_inducing:
-        Z = param_to_array(model.Z)
+        Z = model.Z
         ax.plot(Z[:, input_1], Z[:, input_2], '^w')
 
     ax.set_xlim((xmin, xmax))
diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index c0bd1599..c2bd7d38 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -35,8 +35,7 @@ def add_bar_labels(fig, ax, bars, bottom=0):
 
 
 def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
-    from ...util.misc import param_to_array
-    return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
+    return ax.bar(left=x, height=ard_params.view(np.ndarray), width=.8,
                   bottom=bottom, align='center',
                   color=color, edgecolor='k', linewidth=1.2,
                   label=name.replace("_"," "))
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index 509c9485..ed024c0a 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -8,7 +8,6 @@ except:
     pass
 import numpy as np
 from base_plots import gpplot, x_frame1D, x_frame2D
-from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
 from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
 from scipy import sparse
@@ -67,7 +66,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
         X_variance = model.X.variance
     else:
         X = model.X
-    #X, Y = param_to_array(X, model.Y)
     Y = model.Y
     if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
 
diff --git a/GPy/plotting/matplot_dep/variational_plots.py b/GPy/plotting/matplot_dep/variational_plots.py
index e97f001b..5cced10d 100644
--- a/GPy/plotting/matplot_dep/variational_plots.py
+++ b/GPy/plotting/matplot_dep/variational_plots.py
@@ -1,5 +1,4 @@
 import pylab as pb, numpy as np
-from ...util.misc import param_to_array
 
 def plot(parameterized, fignum=None, ax=None, colors=None):
     """
@@ -21,7 +20,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
     else:
         colors = iter(colors)
     plots = []
-    means, variances = param_to_array(parameterized.mean, parameterized.variance)
+    means, variances = parameterized.mean, parameterized.variance
     x = np.arange(means.shape[0])
     for i in range(means.shape[1]):
         if ax is None:
@@ -68,7 +67,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
     else:
         colors = iter(colors)
     plots = []
-    means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
+    means, variances, gamma = parameterized.mean, parameterized.variance, parameterized.binary_prob
     x = np.arange(means.shape[0])
     for i in range(means.shape[1]):
         if side_by_side:
@@ -77,7 +76,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
         else:
             sub1 = (means.shape[1]*2,1,2*i+1)
             sub2 = (means.shape[1]*2,1,2*i+2)
-            
+
         # mean and variance plot
         a = fig.add_subplot(*sub1)
         a.plot(means, c='k', alpha=.3)
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index 957d5a78..9ff41730 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -4,7 +4,6 @@ import GPy
 import numpy as np
 import matplotlib as mpl
 import time
-from ...util.misc import param_to_array
 from GPy.core.parameterization.variational import VariationalPosterior
 try:
     import visual
@@ -127,7 +126,7 @@ class lvm(matplotlib_show):
         self.latent_index = latent_index
         self.latent_dim = model.input_dim
         self.disable_drag = disable_drag
- 
+
         # The red cross which shows current latent point.
         self.latent_values = vals
         self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
@@ -474,7 +473,7 @@ class mocap_data_show(matplotlib_show):
         self.axes.set_ylim(self.y_lim)
         self.axes.set_zlim(self.z_lim)
         self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
-        
+
 #        self.axes.set_aspect('equal')
 #         self.axes.autoscale(enable=False)
 
@@ -500,7 +499,7 @@ class skeleton_show(mocap_data_show):
         :param vals: set of modeled angles to use for printing in the axis when it's first created.
         :type vals: np.array
         :param skel: skeleton object that has the parameters of the motion capture skeleton associated with it.
-        :type skel: mocap.skeleton object 
+        :type skel: mocap.skeleton object
         :param padding:
         :type int
         """
@@ -512,7 +511,7 @@ class skeleton_show(mocap_data_show):
         """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
 
         :param vals: the values that are being modelled."""
-        
+
         if self.padding>0:
             channels = np.zeros((self.vals.shape[0], self.vals.shape[1]+self.padding))
             channels[:, 0:self.vals.shape[0]] = self.vals
@@ -524,7 +523,7 @@ class skeleton_show(mocap_data_show):
         self.vals[:, 0] = vals_mat[:, 0].copy()
         self.vals[:, 1] = vals_mat[:, 2].copy()
         self.vals[:, 2] = vals_mat[:, 1].copy()
-        
+
     def wrap_around(self, lim, connect):
         quot = lim[1] - lim[0]
         self.vals = rem(self.vals, quot)+lim[0]
@@ -546,7 +545,7 @@ def data_play(Y, visualizer, frame_rate=30):
     Example usage:
 
     This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
-    
+
     .. code-block:: python
 
        data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
@@ -556,7 +555,7 @@ def data_play(Y, visualizer, frame_rate=30):
        GPy.util.visualize.data_play(Y, visualize)
 
     """
-    
+
 
     for y in Y:
         visualizer.modify(y[None, :])
diff --git a/GPy/util/data_resources.json b/GPy/util/data_resources.json
index f8b00ce8..1ed735c3 100644
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@@ -228,7 +228,7 @@
     "fruitfly_tomancak_cel_files": {
         "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
         "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
-        "files": [ 
+        "files": [
             [
                 "embryo_tc_4_1.CEL",
                 "embryo_tc_4_2.CEL",
@@ -284,7 +284,7 @@
         "details": "Google trends results.",
         "files": [
             [
-                
+
             ]
         ],
         "license": null,
@@ -293,7 +293,7 @@
             "http://www.google.com/trends/"
         ]
     },
-    
+
     "hapmap3": {
         "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
         "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n        The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n        See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n        SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n        Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n              /  1, iff SNPij==(B1,B1)\n        Aij = |  0, iff SNPij==(B1,B2)\n              \\\\ -1, iff SNPij==(B2,B2)\n\n        The SNP data and the meta information (such as iid, sex and phenotype) are\n        stored in the dataframe datadf, index is the Individual ID, \n        with following columns for metainfo:\n\n            * family_id   -> Family ID\n            * paternal_id -> Paternal ID\n            * maternal_id -> Maternal ID\n            * sex         -> Sex (1=male; 2=female; other=unknown)\n            * phenotype   -> Phenotype (-9, or 0 for unknown)\n            * population  -> Population string (e.g. 'ASW' - 'YRI')\n            * rest are SNP rs (ids)\n\n        More information is given in infodf:\n\n            * Chromosome:\n                - autosomal chromosemes                -> 1-22\n                - X    X chromosome                    -> 23\n                - Y    Y chromosome                    -> 24\n                - XY   Pseudo-autosomal region of X    -> 25\n                - MT   Mitochondrial                   -> 26\n            * Relative Positon (to Chromosome) [base pairs]\n\n        ",
@@ -523,6 +523,23 @@
             "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
         ]
     },
+    "singlecell_islam": {
+        "citation": "Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells Qiaolin Deng, Daniel Ramskoeld, Bjoern Reinius, and Rickard Sandberg Science 10 January 2014: 343 (6167), 193-196. [DOI:10.1126/science.1245316]",
+        "details" : "92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)",
+        "files"   : [["GSE29087_L139_expression_tab.txt.gz"], ["GSE29087_family.soft.gz"]],
+        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
+        "size"    : 1159449,
+        "urls"    : ["ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/suppl/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/soft/"]
+    },
+    "singlecell_deng": {
+        "citation": "Deng Q, Ramsköld D, Reinius B, Sandberg R. Single-cell RNA-seq reveals dynamic, random monoallelic gene expression in mammalian cells. Science 2014 Jan 10;343(6167):193-6. PMID: 24408435",
+        "details" : "First generation mouse strain crosses were used to study monoallelic expression on the single cell level",
+        "files"   : [["?acc=GSE45719&format=file"], ["GSE45719_series_matrix.txt.gz"]],
+        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
+        "size"    : 1159449,
+        "save_names": [["GSE45719_Raw.tar"], [null]],
+        "urls"    : ["http://www.ncbi.nlm.nih.gov/geo/download/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE45nnn/GSE45719/matrix/"]
+    },
     "sod1_mouse": {
         "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
         "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 93a5dceb..d1250ae4 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -82,20 +82,32 @@ def prompt_user(prompt):
 
 def data_available(dataset_name=None):
     """Check if the data set is available on the local machine already."""
-    for file_list in data_resources[dataset_name]['files']:
-        for file in file_list:
-            if not os.path.exists(os.path.join(data_path, dataset_name, file)):
+    from itertools import izip_longest
+    dr = data_resources[dataset_name]
+    zip_urls = (dr['files'], )
+    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
+    else: zip_urls += ([],)
+
+    for file_list, save_list in izip_longest(*zip_urls, fillvalue=[]):
+        for f, s in izip_longest(file_list, save_list, fillvalue=None):
+            if s is not None: f=s # If there is a save_name given, use that one
+            if not os.path.exists(os.path.join(data_path, dataset_name, f)):
                 return False
     return True
 
-def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
+def download_url(url, store_directory, save_name=None, messages=True, suffix=''):
     """Download a file from a url and save it to disk."""
     i = url.rfind('/')
     file = url[i+1:]
     print file
     dir_name = os.path.join(data_path, store_directory)
-    save_name = os.path.join(dir_name, file)
-    print "Downloading ", url, "->", os.path.join(store_directory, file)
+
+    if save_name is None: save_name = os.path.join(dir_name, file)
+    else: save_name = os.path.join(dir_name, save_name)
+
+    if suffix is None: suffix=''
+
+    print "Downloading ", url, "->", save_name
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
     try:
@@ -178,19 +190,24 @@ def authorize_download(dataset_name=None):
 
 def download_data(dataset_name=None):
     """Check with the user that the are happy with terms and conditions for the data set, then download it."""
+    import itertools
 
     dr = data_resources[dataset_name]
     if not authorize_download(dataset_name):
         raise Exception("Permission to download data set denied.")
 
-    if dr.has_key('suffices'):
-        for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
-            for file, suffix in zip(files, suffices):
-                download_url(os.path.join(url,file), dataset_name, dataset_name, suffix=suffix)
-    else:
-        for url, files in zip(dr['urls'], dr['files']):
-            for file in files:
-                download_url(os.path.join(url,file), dataset_name, dataset_name)
+    zip_urls = (dr['urls'], dr['files'])
+
+    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
+    else: zip_urls += ([],)
+
+    if dr.has_key('suffices'): zip_urls += (dr['suffices'], )
+    else: zip_urls += ([],)
+
+    for url, files, save_names, suffices in itertools.izip_longest(*zip_urls, fillvalue=[]):
+        for f, save_name, suffix in itertools.izip_longest(files, save_names, suffices, fillvalue=None):
+            download_url(os.path.join(url,f), dataset_name, save_name, suffix=suffix)
+
     return True
 
 def data_details_return(data, data_set):
@@ -895,6 +912,128 @@ def singlecell(data_set='singlecell'):
                                 'genes': genes, 'labels':labels,
                                 }, data_set)
 
+def singlecell_rna_seq_islam(dataset='singlecell_islam'):
+    if not data_available(dataset):
+        download_data(dataset)
+
+    from pandas import read_csv, DataFrame, concat
+    dir_path = os.path.join(data_path, dataset)
+    filename = os.path.join(dir_path, 'GSE29087_L139_expression_tab.txt.gz')
+    data = read_csv(filename, sep='\t', skiprows=6, compression='gzip', header=None)
+    header1 = read_csv(filename, sep='\t', header=None, skiprows=5, nrows=1, compression='gzip')
+    header2 = read_csv(filename, sep='\t', header=None, skiprows=3, nrows=1, compression='gzip')
+    data.columns = np.concatenate((header1.ix[0, :], header2.ix[0, 7:]))
+    Y = data.set_index("Feature").ix[8:, 6:-4].T.astype(float)
+
+    # read the info .soft
+    filename = os.path.join(dir_path, 'GSE29087_family.soft.gz')
+    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None)
+    # split at ' = '
+    info = DataFrame(info.ix[:,0].str.split(' = ').tolist())
+    # only take samples:
+    info = info[info[0].str.contains("!Sample")]
+    info[0] = info[0].apply(lambda row: row[len("!Sample_"):])
+
+    groups = info.groupby(0).groups
+    # remove 'GGG' from barcodes
+    barcode = info[1][groups['barcode']].apply(lambda row: row[:-3])
+
+    title = info[1][groups['title']]
+    title.index = barcode
+    title.name = 'title'
+    geo_accession = info[1][groups['geo_accession']]
+    geo_accession.index = barcode
+    geo_accession.name = 'geo_accession'
+    case_id = info[1][groups['source_name_ch1']]
+    case_id.index = barcode
+    case_id.name = 'source_name_ch1'
+
+    info = concat([title, geo_accession, case_id], axis=1)
+    labels = info.join(Y).source_name_ch1[:-4]
+    labels[labels=='Embryonic stem cell'] = "ES"
+    labels[labels=='Embryonic fibroblast'] = "MEF"
+
+    return data_details_return({'Y': Y,
+                                'info': '92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)',
+                                'genes': Y.columns,
+                                'labels': labels,
+                                'datadf': data,
+                                'infodf': info}, dataset)
+
+def singlecell_rna_seq_deng(dataset='singlecell_deng'):
+    if not data_available(dataset):
+        download_data(dataset)
+
+    from pandas import read_csv
+    dir_path = os.path.join(data_path, dataset)
+
+    # read the info .soft
+    filename = os.path.join(dir_path, 'GSE45719_series_matrix.txt.gz')
+    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None, nrows=29, index_col=0)
+    summary = info.loc['!Series_summary'][1]
+    design = info.loc['!Series_overall_design']
+
+    # only take samples:
+    sample_info = read_csv(filename, sep='\t', skiprows=30, compression='gzip', header=0, index_col=0).T
+    sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
+    sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
+    sample_info = sample_info[['geo_accession', 'characteristics_ch1',  'description']]
+    sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
+    c = sample_info.columns.to_series()
+    c[1:4] = ['strain', 'cross', 'developmental_stage']
+    sample_info.columns = c
+
+    # Extract the tar file
+    filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
+    with tarfile.open(filename, 'r') as files:
+        data = None
+        gene_info = None
+        message = ''
+        members = files.getmembers()
+        overall = len(members)
+        for i, file_info in enumerate(members):
+            f = files.extractfile(file_info)
+            inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
+            sys.stdout.write(' '*(len(message)+1) + '\r')
+            sys.stdout.flush()
+            message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
+            sys.stdout.write(message)
+            if data is None:
+                data = inner.RPKM.to_frame()
+                data.columns = [file_info.name[:-18]]
+                gene_info = inner.Refseq_IDs.to_frame()
+                gene_info.columns = [file_info.name[:-18]]
+            else:
+                data[file_info.name[:-18]] = inner.RPKM
+                gene_info[file_info.name[:-18]] = inner.Refseq_IDs
+
+    # Strip GSM number off data index
+    rep = re.compile('GSM\d+_')
+    data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
+    data = data.T
+
+    # make sure the same index gets used
+    sample_info.index = data.index
+
+    # get the labels from the description
+    rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
+    labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
+
+    sys.stdout.write(' '*len(message) + '\r')
+    sys.stdout.flush()
+    print "Read Archive {}".format(files.name)
+
+    return data_details_return({'Y': data,
+                                'series_info': info,
+                                'sample_info': sample_info,
+                                'gene_info': gene_info,
+                                'summary': summary,
+                                'design': design,
+                                'genes': data.columns,
+                                'labels': labels,
+                                }, dataset)
+
+
 def swiss_roll_1000():
     return swiss_roll(num_samples=1000)
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index ac5a2e38..bf37159d 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -90,6 +90,8 @@ Convert an arbitrary number of parameters to :class:ndarray class objects. This
 converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
 In scipy.weave.blitz there is no automatic array detection (even when the array inherits
 from :class:ndarray)"""
+    import warnings
+    warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
     assert len(param) > 0, "At least one parameter needed"
     if len(param) == 1:
         return param[0].view(np.ndarray)