Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-05 01:32:40 +02:00 · 2013-05-17 10:28:21 +01:00 · 2013-05-17 10:28:21 +01:00 · dbfcebe2a0
commit dbfcebe2a0
parent cd6cc9b9b9 394fce68de
19 changed files with 410 additions and 330 deletions
--- a/.gitignore
+++ b/.gitignore
@ -39,3 +39,11 @@ nosetests.xml
 #bfgs optimiser leaves this lying around
 iterate.dat
 # Nosetests #
 #############
 *.noseids
 # git merge files #
 ###################
 *.orig
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -39,23 +39,29 @@ class logexp(transformation):
        return '(+ve)'
 class logexp_clipped(transformation):
-    def __init__(self):
+    max_bound = 1e300
    min_bound = 1e-10
    log_max_bound = np.log(max_bound)
    log_min_bound = np.log(min_bound)
    def __init__(self, lower=1e-6):
        self.domain = 'positive'
        self.lower = lower
    def f(self, x):
-        f = np.log(1. + np.exp(x))
+        exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
        f = np.log(1. + exp)
        return f
    def finv(self, f):
-        return np.log(np.exp(f) - 1.)
+        return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
    def gradfactor(self, f):
        ef = np.exp(f)
        gf = (ef - 1.) / ef
-        return np.where(f < 1e-6, 0, gf)
+        return np.where(f < self.lower, 0, gf)
    def initialize(self, f):
        if np.any(f < 0.):
            print "Warning: changing parameters to satisfy constraints"
        return np.abs(f)
    def __str__(self):
-        return '(+ve)'
+        return '(+ve_c)'
 class exponent(transformation):
    def __init__(self):
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -79,7 +79,6 @@ def toy_linear_1d_classification(seed=default_seed):
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1)
@ -96,7 +95,7 @@ def toy_linear_1d_classification(seed=default_seed):
    m.update_likelihood_approximation()
    # Parameters optimization:
    m.optimize()
-    #m.EPEM() #FIXME
+    #m.pseudo_EM() #FIXME
    # Plot
    pb.subplot(211)
@ -109,14 +108,13 @@ def toy_linear_1d_classification(seed=default_seed):
 def sparse_toy_linear_1d_classification(seed=default_seed):
    """
-    Simple 1D classification example
+    Sparse 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
    """
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y == -1] = 0
    # Kernel object
    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
@ -168,7 +166,6 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
    sample = np.random.randint(0,data['X'].shape[0],inducing)
    Z = data['X'][sample,:]
    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
    # create sparse GP EP model
    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -2,13 +2,11 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-import pylab as pb
+from matplotlib import pyplot as plt
 from matplotlib import pyplot as plt, pyplot
 import GPy
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
-from GPy.util.datasets import simulation_BGPLVM
+from GPy.util.datasets import swiss_roll_generated
 from GPy.core.transformations import square, logexp_clipped
 default_seed = np.random.seed(123344)
@ -47,10 +45,11 @@ def BGPLVM(seed=default_seed):
 def GPLVM_oil_100(optimize=True):
    data = GPy.util.datasets.oil_100()
    Y = data['X']
    # create simple GP model
    kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
-    m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
+    m = GPy.models.GPLVM(Y, 6, kernel=kernel)
    m.data_labels = data['Y'].argmax(axis=1)
    # optimize
@ -63,27 +62,88 @@ def GPLVM_oil_100(optimize=True):
    m.plot_latent(labels=m.data_labels)
    return m
-def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
+def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
    from GPy.util.datasets import swiss_roll
    from GPy.core.transformations import logexp_clipped
    data = swiss_roll_generated(N=N, sigma=sigma)
    Y = data['Y']
    Y -= Y.mean()
    Y /= Y.std()
    t = data['t']
    c = data['colors']
    try:
        from sklearn.manifold.isomap import Isomap
        iso = Isomap().fit(Y)
        X = iso.embedding_
        if Q > 2:
            X = np.hstack((X, np.random.randn(N, Q - 2)))
    except ImportError:
        X = np.random.randn(N, Q)
    if plot:
        from mpl_toolkits import mplot3d
        import pylab
        fig = pylab.figure("Swiss Roll Data")
        ax = fig.add_subplot(121, projection='3d')
        ax.scatter(*Y.T, c=c)
        ax.set_title("Swiss Roll")
        ax = fig.add_subplot(122)
        ax.scatter(*X.T[:2], c=c)
        ax.set_title("Initialization")
    var = .5
    S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
                                         - (1 - var),
                                         (1 - var))) + .001
    Z = np.random.permutation(X)[:M]
    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
    m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
    m.data_colors = c
    m.data_t = t
    m.constrain('variance|length', logexp_clipped())
    m['lengthscale'] = 1. # X.var(0).max() / X.var(0)
    m['noise'] = Y.var() / 100.
    m.ensure_default_constraints()
    if optimize:
        m.optimize('scg', messages=1)
    return m
 def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
    data = GPy.util.datasets.oil()
    from GPy.core.transformations import logexp_clipped
    np.random.seed(0)
    # create simple GP model
    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
    Y = data['X'][:N]
-    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
+    Yn = Y - Y.mean(0)
    Yn /= Yn.std(0)
    m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
    m.data_labels = data['Y'][:N].argmax(axis=1)
-    m.constrain('variance', logexp_clipped())
+#     m.constrain('variance', logexp_clipped())
-    m.constrain('length', logexp_clipped())
+#     m.constrain('length', logexp_clipped())
-    m['lengt'] = 100.
+    m['lengt'] = m.X.var(0).max() / m.X.var(0)
    m['noise'] = Yn.var() / 100.
    m.ensure_default_constraints()
    # optimize
    if optimize:
-        m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
+#         m.unconstrain('noise'); m.constrain_fixed('noise')
-        m.optimize('scg', messages=1, max_f_eval=150)
+#         m.optimize('scg', messages=1, max_f_eval=200)
-
+#         m.unconstrain('noise')
-        m.unconstrain('noise')
+#         m.constrain('noise', logexp_clipped())
        m.constrain('noise', logexp_clipped())
        m.optimize('scg', messages=1, max_f_eval=max_f_eval)
    if plot:
@ -115,6 +175,8 @@ def oil_100():
    # m.plot_latent(labels=data['Y'].argmax(axis=1))
    return m
 def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
    x = np.linspace(0, 4 * np.pi, N)[:, None]
    s1 = np.vectorize(lambda x: np.sin(x))
@ -178,6 +240,7 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
    return slist, [S1, S2, S3], Ylist
 def bgplvm_simulation_matlab_compare():
    from GPy.util.datasets import simulation_BGPLVM
    sim_data = simulation_BGPLVM()
    Y = sim_data['Y']
    S = sim_data['S']
@ -213,6 +276,8 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
                      max_burnin=100, true_X=False,
                      do_opt=True,
                      max_f_eval=1000):
    from GPy.core.transformations import logexp_clipped
    D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
@ -317,6 +382,8 @@ def mrd_simulation(plot_sim=False):
    from GPy.models import mrd
    from GPy import kern
    from GPy.core.transformations import logexp_clipped
    reload(mrd); reload(kern)
 #    k = kern.rbf(2, ARD=True) + kern.bias(2) + kern.white(2)
@ -365,13 +432,23 @@ def mrd_silhouette():
    pass
 def brendan_faces():
    from GPy import kern
    data = GPy.util.datasets.brendan_faces()
-    Y = data['Y'][0:-1:10, :]
+    Q = 2
-    m = GPy.models.GPLVM(data['Y'], 2)
+    # Y = data['Y'][0:-1:2, :]
    Y = data['Y']
    Yn = Y - Y.mean()
    Yn /= Yn.std()
    m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4)
    # optimize
    # m.constrain_fixed('white', 1e-2)
    # m.constrain_bounded('noise', 1e-6, 10)
    m.constrain('rbf', GPy.core.transformations.logexp_clipped())
    m.ensure_default_constraints()
-    m.optimize(messages=1, max_f_eval=10000)
+    m.optimize('scg', messages=1, max_f_eval=10000)
    ax = m.plot_latent()
    y = m.likelihood.Y[0, :]
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@ -111,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        iteration += 1
        if display:
            print '\r',
-            print 'i: {0:>5g}  f:{1:> 12e}  b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
+            print 'Iter: {0:>0{mi}g}  Obj:{1:> 12e}  Scale:{2:> 12e}  |g|:{3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))),
            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
            sys.stdout.flush()
@ -130,7 +130,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
                # If the gradient is zero then we are done.
                if current_grad <= gtol:
                    status = 'converged'
-                    return x, flog, function_eval, status
+                    break
                    # return x, flog, function_eval, status
        # Adjust beta according to comparison ratio.
        if Delta < 0.25:
@ -147,9 +148,10 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        elif success:
            gamma = np.dot(gradold - gradnew, gradnew) / (mu)
            d = gamma * d - gradnew
-
+    else:
        # If we get here, then we haven't terminated in the given number of
        # iterations.
        status = "maxiter exceeded"
    print ""
    return x, flog, function_eval, status
--- a/GPy/inference/natural_gradient_scg.py
+++ b/GPy/inference/natural_gradient_scg.py
@ -1,146 +0,0 @@
 #Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
 #Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
 #      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
 #      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 #      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
 #      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 #      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 #      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 #      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 #      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 #      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 #      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 #      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 #      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 #      HOWEVER CAUSED AND ON ANY THEORY OF
 #      LIABILITY, WHETHER IN CONTRACT, STRICT
 #      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 #      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 #      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 #      POSSIBILITY OF SUCH DAMAGE.
 import numpy as np
 import sys
 def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
    """
    Optimisation through Scaled Conjugate Gradients (SCG)
    f: the objective function
    gradf : the gradient function (should return a 1D np.ndarray)
    x : the initial condition
    Returns
    x the optimal value for x
    flog : a list of all the objective values
    """
    sigma0 = 1.0e-4
    fold = f(x, *optargs)	# Initial function value.
    function_eval = 1
    fnow = fold
    gradnew = gradf(x, *optargs)	# Initial gradient.
    gradold = gradnew.copy()
    d = -gradnew				# Initial search direction.
    success = True				# Force calculation of directional derivs.
    nsuccess = 0				# nsuccess counts number of successes.
    beta = 1.0				# Initial scale parameter.
    betamin = 1.0e-15 			# Lower bound on scale.
    betamax = 1.0e100			# Upper bound on scale.
    status = "Not converged"
    flog = [fold]
    iteration = 0
    # Main optimization loop.
    while iteration < maxiters:
        # Calculate first and second directional derivatives.
        if success:
            mu = np.dot(d, gradnew)
            if mu >= 0:
                d = -gradnew
                mu = np.dot(d, gradnew)
            kappa = np.dot(d, d)
            sigma = sigma0/np.sqrt(kappa)
            xplus = x + sigma*d
            gplus = gradf(xplus, *optargs)
            theta = np.dot(d, (gplus - gradnew))/sigma
        # Increase effective curvature and evaluate step size alpha.
        delta = theta + beta*kappa
        if delta <= 0:
            delta = beta*kappa
            beta = beta - theta/kappa
        alpha = - mu/delta
        # Calculate the comparison ratio.
        xnew = x + alpha*d
        fnew = f(xnew, *optargs)
        function_eval += 1
        if function_eval >= max_f_eval:
            status = "Maximum number of function evaluations exceeded"
            return x, flog, function_eval, status
        Delta = 2.*(fnew - fold)/(alpha*mu)
        if Delta  >= 0.:
            success = True
            nsuccess += 1
            x = xnew
            fnow = fnew
        else:
            success = False
            fnow = fold
        # Store relevant variables
        flog.append(fnow)		# Current function value
        iteration += 1
        if display:
            print '\r',
            print 'Iteration: {0:>5g}  Objective:{1:> 12e}  Scale:{2:> 12e}'.format(iteration, fnow, beta),
            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
            sys.stdout.flush()
        if success:
            # Test for termination
            if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
                status='converged'
                return x, flog, function_eval, status
            else:
                # Update variables for new position
                fold = fnew
                gradold = gradnew
                gradnew = gradf(x, *optargs)
                # If the gradient is zero then we are done.
                if np.dot(gradnew,gradnew) == 0:
                    return x, flog, function_eval, status
        # Adjust beta according to comparison ratio.
        if Delta < 0.25:
            beta = min(4.0*beta, betamax)
        if Delta > 0.75:
            beta = max(0.5*beta, betamin)
        # Update search direction using Polak-Ribiere formula, or re-start
        # in direction of negative gradient after nparams steps.
        if nsuccess == x.size:
            d = -gradnew
            nsuccess = 0
        elif success:
            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
            d = gamma*d - gradnew
    # If we get here, then we haven't terminated in the given number of
    # iterations.
    status = "maxiter exceeded"
    return x, flog, function_eval, status
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -61,7 +61,7 @@ class kern(parameterised):
                ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
                ax.set_xticks(np.arange(len(ard_params)))
-                ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
+                ax.set_xticklabels([r"${}$".format(i) for i in range(len(ard_params))])
        return ax
    def _transform_gradients(self, g):
@ -176,8 +176,8 @@ class kern(parameterised):
        prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices]
        prev_constr = K1.constraints + K2.constraints
-        prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
+        # prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
-        prev_constr_fix_values = K1.fixed_values + K2.fixed_values
+        # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
        # follow the previous ties
        for arr in prev_ties:
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -1,6 +1,6 @@
 import numpy as np
 from scipy import stats, linalg
-from ..util.linalg import pdinv,mdot,jitchol,DSYR
+from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot
 from likelihood import likelihood
 class EP(likelihood):
@ -117,8 +117,6 @@ class EP(likelihood):
                self.v_tilde[i] += Delta_v
                #Posterior distribution parameters update
                DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
                #si=Sigma[:,i:i+1]
                #Sigma -= Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)#DSYR
                mu = np.dot(Sigma,self.v_tilde)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
@ -135,12 +133,12 @@ class EP(likelihood):
        return self._compute_GP_variables()
    #def fit_DTC(self, Knn_diag, Kmn, Kmm):
    def fit_DTC(self, Kmm, Kmn):
        """
        The expectation-propagation algorithm with sparse pseudo-input.
        For nomenclature see ... 2013.
        """
        M = Kmm.shape[0]
        #TODO: this doesn't work with uncertain inputs!
@ -149,12 +147,20 @@ class EP(likelihood):
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = Qnn = Knm*Kmmi*Kmn
        """
        Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
        KmnKnm = np.dot(Kmn,Kmn.T)
        Lm = jitchol(Kmm)
        Lmi = chol_inv(Lm)
        Kmmi = np.dot(Lmi.T,Lmi)
        KmmiKmn = np.dot(Kmmi,Kmn)
        Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
        LLT0 = Kmm.copy()
        #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
        #KmnKnm = np.dot(Kmn, Kmn.T)
        #KmmiKmn = np.dot(Kmmi,Kmn)
        #Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
        #LLT0 = Kmm.copy()
        """
        Posterior approximation: q(f|y) = N(f| mu, Sigma)
        Sigma = Diag + P*R.T*R*P.T + K
@ -197,19 +203,19 @@ class EP(likelihood):
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
-                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
+                self.tau_tilde[i] += Delta_tau
-                self.v_tilde[i] = self.v_tilde[i] + Delta_v
+                self.v_tilde[i] += Delta_v
                #Posterior distribution parameters update
-                LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
+                DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
                L = jitchol(LLT)
                #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
                V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
                Sigma_diag = np.sum(V*V,-2)
                si = np.sum(V.T*V[:,i],-1)
-                mu = mu + (Delta_v-Delta_tau*mu[i])*si
+                mu += (Delta_v-Delta_tau*mu[i])*si
                self.iterations += 1
            #Sigma recomputation with Cholesky decompositon
-            LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
+            LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
            L = jitchol(LLT)
            V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
            V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
@ -235,7 +241,9 @@ class EP(likelihood):
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
        """
-        Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
+        Lm = jitchol(Kmm)
        Lmi = chol_inv(Lm)
        Kmmi = np.dot(Lmi.T,Lmi)
        P0 = Kmn.T
        KmnKnm = np.dot(P0.T, P0)
        KmmiKmn = np.dot(Kmmi,P0.T)
@ -290,8 +298,8 @@ class EP(likelihood):
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
-                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
+                self.tau_tilde[i] += Delta_tau
-                self.v_tilde[i] = self.v_tilde[i] + Delta_v
+                self.v_tilde[i] += Delta_v
                #Posterior distribution parameters update
                dtd1 = Delta_tau*Diag[i] + 1.
                dii = Diag[i]
@ -301,8 +309,8 @@ class EP(likelihood):
                Rp_i = np.dot(R,pi_.T)
                RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
                R = jitchol(RTR).T
-                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
+                self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
-                self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
+                self.gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
                RPT = np.dot(R,P.T)
                Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
                mu = self.w + np.dot(P,self.gamma)
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@ -53,10 +53,10 @@ class Gaussian(likelihood):
    def _set_params(self, x):
        x = float(x)
        if self._variance != x:
-            self._variance = x
+            self.precision = 1. / x
-            self.covariance_matrix = np.eye(self.N) * self._variance
+            self.covariance_matrix = np.eye(self.N) * x
            self.precision = 1. / self._variance
            self.V = (self.precision) * self.Y
            self._variance = x
    def predictive_values(self, mu, var, full_cov):
        """
@ -69,6 +69,7 @@ class Gaussian(likelihood):
                # Note. for D>1, we need to re-normalise all the outputs independently.
                # This will mess up computations of diag(true_var), below.
                # note that the upper, lower quantiles should be the same shape as mean
            # Augment the output variance with the likelihood variance and rescale.
            true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
            _5pc = mean - 2.*np.sqrt(np.diag(true_var))
            _95pc = mean + 2.*np.sqrt(np.diag(true_var))
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -58,7 +58,7 @@ class probit(likelihood_function):
        norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
        p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
        p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
-        return mean, np.nan*var, p_025, p_975 # TODO: var
+        return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
 class Poisson(likelihood_function):
    """
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -27,7 +27,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
    """
    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
-                 Z=None, kernel=None, oldpsave=5, _debug=False,
+                 Z=None, kernel=None, oldpsave=10, _debug=False,
                 **kwargs):
        if X == None:
            X = self.initialise_latent(init, Q, Y)
@ -87,19 +87,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        return x
    def _set_params(self, x, save_old=True, save_count=0):
-        try:
+#         try:
            N, Q = self.N, self.Q
            self.X = x[:self.X.size].reshape(N, Q).copy()
            self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
            sparse_GP._set_params(self, x[(2 * N * Q):])
-            self.oldps = x
+#             self.oldps = x
-        except (LinAlgError, FloatingPointError, ZeroDivisionError):
+#         except (LinAlgError, FloatingPointError, ZeroDivisionError):
-            print "\rWARNING: Caught LinAlgError, continueing without setting            "
+#             print "\rWARNING: Caught LinAlgError, continueing without setting            "
-            if self._debug:
+#             if self._debug:
-                self._savederrors.append(self.f_call)
+#                 self._savederrors.append(self.f_call)
-            if save_count > 10:
+#             if save_count > 10:
-                raise
+#                 raise
-            self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
+#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
    def dKL_dmuS(self):
        dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@ -167,8 +167,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #         d_dmu = (dL_dmu).flatten()
 #         d_dS = (dL_dS).flatten()
        # ========================
-        dbound_dmuS = np.hstack((d_dmu, d_dS))
+        self.dbound_dmuS = np.hstack((d_dmu, d_dS))
-        return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
+        self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
        return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
    def _log_likelihood_normal_gradients(self):
        Si, _, _, _ = pdinv(self.X_variance)
    def plot_latent(self, which_indices=None, *args, **kwargs):
@ -263,7 +267,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        param_dict = dict(self._savedparams)
        gradient_dict = dict(self._savedgradients)
-        kmm_dict = dict(self._savedpsiKmm)
+#         kmm_dict = dict(self._savedpsiKmm)
        iters = np.array(param_dict.keys())
        ABCD_dict = np.array(self._savedABCD)
        self.showing = 0
--- a/GPy/models/FITC.py
+++ b/GPy/models/FITC.py
@ -3,8 +3,7 @@
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, tdot, symmetrify,pdinv
+from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
 #from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
 from ..util.plot import gpplot
 from .. import kern
 from scipy import stats, linalg
@ -33,7 +32,6 @@ class FITC(sparse_GP):
            self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
            self._set_params(self._get_params()) # update the GP
    #@profile
    def _computations(self):
        #factor Kmm
@ -58,18 +56,15 @@ class FITC(sparse_GP):
        # factor B
        self.B = np.eye(self.M) + self.A
        self.LB = jitchol(self.B)
-        self.LBi,info = linalg.lapack.flapack.dtrtrs(self.LB,np.eye(self.M),lower=1)
+        self.LBi = chol_inv(self.LB)
        self.psi1V = np.dot(self.psi1, self.V_star)
-        # back substutue C into psi1V
+        Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
        Lmi_psi1V, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
        self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
        Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
        b_psi1_Ki = self.beta_star * Kmmipsi1.T
        Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
        Kmmi = np.dot(self.Lmi.T,self.Lmi)
        LBiLmi = np.dot(self.LBi,self.Lmi)
        LBL_inv = np.dot(LBiLmi.T,LBiLmi)
@ -78,13 +73,15 @@ class FITC(sparse_GP):
        Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
        psi1beta = self.psi1*self.beta_star.T
        H = self.Kmm + mdot(self.psi1,psi1beta.T)
-        Hi, LH, LHi, logdetH = pdinv(H)
+        LH = jitchol(H)
        LHi = chol_inv(LH)
        Hi = np.dot(LHi.T,LHi)
        betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
        alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
        gamma_1 = mdot(VVT,self.psi1.T,Hi)
        pHip = mdot(self.psi1.T,Hi,self.psi1)
        gamma_2 = mdot(self.beta_star*pHip,self.V_star)
        #gamma_3 = self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T
        gamma_3 = self.V_star * gamma_2
        self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
@ -97,31 +94,31 @@ class FITC(sparse_GP):
        self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
        self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
-        self._dL_dKmm += -.5*Kmmi + .5*LBL_inv + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
+        self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
        self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
        self._dpsi1_dtheta = 0
        self._dpsi1_dX = 0
        self._dKmm_dtheta = 0
        self._dKmm_dX = 0
        for psi1_n,V_n,X_n,alpha_n,gamma_n,gamma_k in zip(self.psi1.T,self.V_star,self.X,alpha,gamma_2,gamma_3):
-            psin_K = np.dot(psi1_n[None,:],Kmmi)
+        self._dpsi1_dX_jkj = 0
        self._dpsi1_dtheta_jkj = 0
-            _dpsi1 = -V_n**2 * psin_K #dA_dpsi1: yT*beta_star*y
+        for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
-            _dpsi1 += - alpha_n * psin_K #Diag_dC_dpsi1
+            K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
            _dpsi1 += - gamma_n**2 * psin_K + 2. * gamma_k * psin_K #Diag_dD_dpsi1
-            _dKmm = .5*V_n**2 * np.dot(psin_K.T,psin_K) #dA_dKmm: yT*beta_star*y
+            #Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
-            _dKmm += .5 * alpha_n * np.dot(psin_K.T,psin_K) #Diag_dC_dKmm
+            _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
            _dKmm += .5*gamma_n**2 * np.dot(psin_K.T,psin_K) - gamma_k * np.dot(psin_K.T,psin_K) #Diag_dD_dKmm
-            self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,X_n[None,:],self.Z)
+            #Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
            _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
            self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
            self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
            self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
-            self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,X_n[None,:])
+            self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
        # the partial derivative vector for the likelihood
        if self.likelihood.Nparams == 0:
@ -235,8 +232,6 @@ class FITC(sparse_GP):
                var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
            else:
                Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
                Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
                var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
                var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
            return mu_star[:,None],var
        else:
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@ -28,7 +28,7 @@ class GPLVM(GP):
        if X is None:
            X = self.initialise_latent(init, Q, Y)
        if kernel is None:
-            kernel = kern.rbf(Q) + kern.bias(Q)
+            kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
        likelihood = Gaussian(Y, normalize=normalize_Y)
        GP.__init__(self, X, likelihood, kernel, **kwargs)
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -76,7 +76,7 @@ class sparse_GP(GP):
 #                 psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
                psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
                evals, evecs = linalg.eigh(psi2_beta_scaled)
-                clipped_evals = np.clip(evals, 0., 1e15)  # TODO: make clipping configurable
+                clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
                if not np.allclose(evals, clipped_evals):
                    print "Warning: clipping posterior eigenvalues"
                tmp = evecs * np.sqrt(clipped_evals)
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -4,6 +4,7 @@ import numpy as np
 import GPy
 import scipy.sparse
 import scipy.io
 import cPickle as pickle
 data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
@ -96,16 +97,29 @@ def stick():
    lbls = 'connect'
    return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."}
 def swiss_roll_generated(N=1000, sigma=0.0):
    with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
        data = pickle.load(f)
    Na = data['Y'].shape[0]
    perm = np.random.permutation(np.r_[:Na])[:N]
    Y = data['Y'][perm, :]
    t = data['t'][perm]
    c = data['colors'][perm, :]
    so = np.argsort(t)
    Y = Y[so, :]
    t = t[so]
    c = c[so, :]
    return {'Y':Y, 't':t, 'colors':c}
 def swiss_roll_1000():
    mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data'))
    Y = mat_data['X_data'][:, 0:1000].transpose()
    return {'Y': Y, 'info': "Subsample of the swiss roll data extracting only the first 1000 values."}
-def swiss_roll():
+def swiss_roll(N=3000):
    mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat'))
-    Y = mat_data['X_data'][:, 0:3000].transpose()
+    Y = mat_data['X_data'][:, 0:N].transpose()
-    return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
+    return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
 def toy_rbf_1d(seed=default_seed):
    np.random.seed(seed=seed)
--- a/GPy/util/datasets/swiss_roll.pickle
+++ b/GPy/util/datasets/swiss_roll.pickle
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -237,6 +237,16 @@ def tdot(*args, **kwargs):
        return tdot_numpy(*args,**kwargs)
 def DSYR(A,x,alpha=1.):
    """
    Performs a symmetric rank-1 update operation:
    A <- A + alpha * np.dot(x,x.T)
    Arguments
    ---------
    :param A: Symmetric NxN np.array
    :param x: Nx1 np.array
    :param alpha: scalar
    """
    N = c_int(A.shape[0])
    LDA = c_int(A.shape[0])
    UPLO = c_char('l')
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@ -44,7 +44,7 @@ class vector_show(data_show):
 class lvm(data_show):
-    def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
+    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
        """Visualize a latent variable model
        :param model: the latent variable model to visualize.
@ -71,7 +71,7 @@ class lvm(data_show):
        self.data_visualize = data_visualize
        self.model = model
        self.latent_axes = latent_axes
-
+        self.sense_axes = sense_axes
        self.called = False
        self.move_on = False
        self.latent_index = latent_index
@ -81,10 +81,12 @@ class lvm(data_show):
        self.latent_values = vals
        self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
        self.modify(vals)
        self.show_sensitivities()
    def modify(self, vals):
        """When latent values are modified update the latent representation and ulso update the output visualization."""
        y = self.model.predict(vals)[0]
        print y
        self.data_visualize.modify(y)
        self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
        self.axes.figure.canvas.draw()
@ -99,6 +101,7 @@ class lvm(data_show):
        if event.inaxes!=self.latent_axes: return
        self.move_on = not self.move_on
        self.called = True
    def on_move(self, event):
        if event.inaxes!=self.latent_axes: return
        if self.called and self.move_on:
@ -107,38 +110,9 @@ class lvm(data_show):
            self.latent_values[self.latent_index[1]]=event.ydata
            self.modify(self.latent_values)
 class lvm_subplots(lvm):
    """
    latent_axes is a np array of dimension np.ceil(Q/2) + 1,
    one for each pair of the axes, and the last one for the sensitiity bar chart
    """
    def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
        lvm.__init__(self, vals, model,data_visualize,latent_axes,[0,1])
        self.nplots = int(np.ceil(model.Q/2.))+1
        lvm.__init__(self,model,data_visualize,latent_axes,latent_index)
        self.latent_values = np.zeros(2*np.ceil(self.model.Q/2.)) # possibly an extra dimension on this
        assert latent_axes.size == self.nplots
 class lvm_dimselect(lvm):
    """
    A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
    """
    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
        if latent_axes==None and sense_axes==None:
            self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
        elif sense_axes==None:
            fig=plt.figure()
            self.sense_axes = fig.add_subplot(111)
        else:
            self.sense_axes = sense_axes
        lvm.__init__(self,vals,model,data_visualize,latent_axes,latent_index)
        self.show_sensitivities()
        print "use left and right mouse butons to select dimensions"
    def show_sensitivities(self):
        # A click in the bar chart axis for selection a dimension.
        if self.sense_axes != None:
            self.sense_axes.cla()
            self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
@ -152,6 +126,52 @@ class lvm_dimselect(lvm):
            self.sense_axes.figure.canvas.draw()
 class lvm_subplots(lvm):
    """
    latent_axes is a np array of dimension np.ceil(Q/2),
    one for each pair of the latent dimensions.
    """
    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None):
        self.nplots = int(np.ceil(model.Q/2.))+1
        assert len(latent_axes)==self.nplots
        if vals==None:
            vals = model.X[0, :]
        self.latent_values = vals 
        for i, axis in enumerate(latent_axes):
            if i == self.nplots-1:
                if self.nplots*2!=model.Q:
                    latent_index = [i*2, i*2]
                lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index)
            else:
                latent_index = [i*2, i*2+1]
                lvm.__init__(self, self.latent_vals, model, data_visualize, axis, latent_index=latent_index)
 class lvm_dimselect(lvm):
    """
    A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
    For an example of the visualizer's use try:
    GPy.examples.dimensionality_reduction.BGPVLM_oil()
    """
    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
        if latent_axes==None and sense_axes==None:
            self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
        elif sense_axes==None:
            fig=plt.figure()
            self.sense_axes = fig.add_subplot(111)
        else:
            self.sense_axes = sense_axes
        lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
        print "use left and right mouse butons to select dimensions"
    def on_click(self, event):
        if event.inaxes==self.sense_axes:
@ -177,12 +197,6 @@ class lvm_dimselect(lvm):
        self.called = True
    def on_move(self, event):
        if event.inaxes!=self.latent_axes: return
        if self.called and self.move_on:
            self.latent_values[self.latent_index[0]]=event.xdata
            self.latent_values[self.latent_index[1]]=event.ydata            
            self.modify(self.latent_values)
    def on_leave(self,event):
        latent_values = self.latent_values.copy()
@ -214,15 +228,30 @@ class image_show(data_show):
    def modify(self, vals):
        self.set_image(vals)
        self.handle.set_array(self.vals)
-        self.axes.figure.canvas.draw() # Teo - original line: plt.show()
+        self.axes.figure.canvas.draw() 
    def set_image(self, vals):
        dim = self.dimensions[0] * self.dimensions[1]
        nImg = np.sqrt(vals[0,].size/dim)
        if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
            nImg = np.int(nImg)
            self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
            for iR in range(nImg):
                for iC in range(nImg):
                    currImgId = iR*nImg + iC
                    currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
                    firstRow = iR*self.dimensions[0]
                    lastRow = (iR+1)*self.dimensions[0]
                    firstCol = iC*self.dimensions[1]
                    lastCol = (iC+1)*self.dimensions[1]
                    self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
        else: 
            self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
        if self.transpose:
            self.vals = self.vals.T.copy()
-        if not self.scale:
+        # if not self.scale:
-            self.vals = self.vals
+        #     self.vals = self.vals
        if self.invert:
            self.vals = -self.vals