Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-12 13:32:39 +02:00 · 2013-04-25 12:52:13 +01:00 · 2013-04-25 12:52:13 +01:00 · 5dd343e89d
commit 5dd343e89d
parent 8191a5d24f f8c3cd669d
8 changed files with 617 additions and 264 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -7,7 +7,7 @@ from scipy import optimize
 import sys, pdb
 import multiprocessing as mp
 from GPy.util.misc import opt_wrapper
-#import numdifftools as ndt
+# import numdifftools as ndt
 from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
@ -25,14 +25,14 @@ class model(parameterised):
        self.preferred_optimizer = 'tnc'
    def _get_params(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"
-    def _set_params(self,x):
+    def _set_params(self, x):
        raise NotImplementedError, "this needs to be implemented to use the model class"
    def log_likelihood(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"
    def _log_likelihood_gradients(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"

-    def set_prior(self,which,what):
+    def set_prior(self, which, what):
        """
        Sets priors on the model parameters.

@ -53,84 +53,59 @@ class model(parameterised):

        which = self.grep_param_names(which)

-        #check tied situation
-        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
+        # check tied situation
+        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
        if len(tie_partial_matches):
            raise ValueError, "cannot place prior across partial ties"
-        tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
-        if len(tie_matches)>1:
+        tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
+        if len(tie_matches) > 1:
            raise ValueError, "cannot place prior across multiple ties"
-        elif len(tie_matches)==1:
-            which = which[:1]# just place a prior object on the first parameter
+        elif len(tie_matches) == 1:
+            which = which[:1]  # just place a prior object on the first parameter


-        #check constraints are okay
+        # check constraints are okay
        if isinstance(what, (priors.gamma, priors.log_Gaussian)):
-            assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
-            assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
+            assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
            unconst = np.setdiff1d(which, self.constrained_positive_indices)
            if len(unconst):
                print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
+                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
                print '\n'
                self.constrain_positive(unconst)
-        elif isinstance(what,priors.Gaussian):
-            assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
+        elif isinstance(what, priors.Gaussian):
+            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
        else:
            raise ValueError, "prior not recognised"


-        #store the prior in a local list
+        # store the prior in a local list
        for w in which:
            self.priors[w] = what

-    def get(self,name, return_names=False):
-        """
-        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
-        """
-        matches = self.grep_param_names(name)
-        if len(matches):
-            if return_names:
-                return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
-            else:
-                return self._get_params()[matches]
-        else:
-            raise AttributeError, "no parameter matches %s"%name
-
-    def set(self,name,val):
-        """
-        Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
-        """
-        matches = self.grep_param_names(name)
-        if len(matches):
-            x = self._get_params()
-            x[matches] = val
-            self._set_params(x)
-        else:
-            raise AttributeError, "no parameter matches %s"%name
-
-    def get_gradient(self,name, return_names=False):
+    def get_gradient(self, name, return_names=False):
        """
        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
        """
        matches = self.grep_param_names(name)
        if len(matches):
            if return_names:
-                return self._log_likelihood_gradients()[matches],  np.asarray(self._get_param_names())[matches].tolist()
+                return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
            else:
                return self._log_likelihood_gradients()[matches]
        else:
-            raise AttributeError, "no parameter matches %s"%name
+            raise AttributeError, "no parameter matches %s" % name

    def log_prior(self):
        """evaluate the prior"""
-        return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
+        return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])

    def _log_prior_gradients(self):
        """evaluate the gradients of the priors"""
        x = self._get_params()
        ret = np.zeros(x.size)
-        [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
+        [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
        return ret

    def _transform_gradients(self, g):
@ -139,13 +114,13 @@ class model(parameterised):
        """

        x = self._get_params()
-        g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
-        g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
-        [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
-        [np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
+        g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
+        g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
+        [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
        if len(self.tied_indices) or len(self.constrained_fixed_indices):
-            to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
-            return np.delete(g,to_remove)
+            to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
+            return np.delete(g, to_remove)
        else:
            return g

@ -155,15 +130,15 @@ class model(parameterised):
        Randomize the model.
        Make this draw from the prior if one exists, else draw from N(0,1)
        """
-        #first take care of all parameters (from N(0,1))
+        # first take care of all parameters (from N(0,1))
        x = self._get_params_transformed()
        x = np.random.randn(x.size)
        self._set_params_transformed(x)
-        #now draw from prior where possible
+        # now draw from prior where possible
        x = self._get_params()
-        [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
+        [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
        self._set_params(x)
-        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed())  # makes sure all of the tied parameters get the same init (since there's only one prior object...)


    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@ -197,10 +172,10 @@ class model(parameterised):
                pool = mp.Pool(processes=num_processes)
                for i in range(Nrestarts):
                    self.randomize()
-                    job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
+                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
                    jobs.append(job)

-                pool.close() # signal that no more data coming in
+                pool.close()  # signal that no more data coming in
                pool.join()  # wait for all the tasks to complete
            except KeyboardInterrupt:
                print "Ctrl+c received, terminating and joining pool."
@ -216,10 +191,10 @@ class model(parameterised):
                    self.optimization_runs.append(jobs[i].get())

                if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
+                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
            except Exception as e:
                if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
+                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
                else:
                    raise e

@ -229,11 +204,11 @@ class model(parameterised):
        else:
            self._set_params_transformed(initial_parameters)

-    def ensure_default_constraints(self,warn=False):
+    def ensure_default_constraints(self, warn=False):
        """
        Ensure that any variables which should clearly be positive have been constrained somehow.
        """
-        positive_strings = ['variance','lengthscale', 'precision']
+        positive_strings = ['variance', 'lengthscale', 'precision']
        param_names = self._get_param_names()
        currently_constrained = self.all_constrained_indices()
        to_make_positive = []
@ -242,9 +217,9 @@ class model(parameterised):
                if not (i in currently_constrained):
                    to_make_positive.append(re.escape(param_names[i]))
                    if warn:
-                        print "Warning! constraining %s postive"%name
+                        print "Warning! constraining %s postive" % name
        if len(to_make_positive):
-            self.constrain_positive('('+'|'.join(to_make_positive)+')')
+            self.constrain_positive('(' + '|'.join(to_make_positive) + ')')



@ -262,14 +237,14 @@ class model(parameterised):
        self._set_params_transformed(x)
        LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
        prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        return - LL_gradients - prior_gradients
+        return -LL_gradients - prior_gradients

    def objective_and_gradients(self, x):
        self._set_params_transformed(x)
-        obj_f =  -self.log_likelihood() - self.log_prior()
+        obj_f = -self.log_likelihood() - self.log_prior()
        LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
        prior_gradients = self._transform_gradients(self._log_prior_gradients())
-        obj_grads = - LL_gradients - prior_gradients
+        obj_grads = -LL_gradients - prior_gradients
        return obj_f, obj_grads

    def optimize(self, optimizer=None, start=None, **kwargs):
@ -289,13 +264,13 @@ class model(parameterised):
            start = self._get_params_transformed()

        optimizer = optimization.get_optimizer(optimizer)
-        opt = optimizer(start, model = self, **kwargs)
+        opt = optimizer(start, model=self, **kwargs)
        opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
        self.optimization_runs.append(opt)

        self._set_params_transformed(opt.x_opt)

-    def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
+    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
        # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
        sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
        sgd.run()
@ -303,8 +278,8 @@ class model(parameterised):

    def Laplace_covariance(self):
        """return the covariance matric of a Laplace approximatino at the current (stationary) point"""
-        #TODO add in the prior contributions for MAP estimation
-        #TODO fix the hessian for tied, constrained and fixed components
+        # TODO add in the prior contributions for MAP estimation
+        # TODO fix the hessian for tied, constrained and fixed components
        if hasattr(self, 'log_likelihood_hessian'):
            A = -self.log_likelihood_hessian()

@ -318,8 +293,8 @@ class model(parameterised):
            A = -h(x)
            self._set_params(x)
        # check for almost zero components on the diagonal which screw up the cholesky
-        aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
-        A[aa,aa] = 0.
+        aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
+        A[aa, aa] = 0.
        return A

    def Laplace_evidence(self):
@ -330,11 +305,11 @@ class model(parameterised):
            hld = np.sum(np.log(np.diag(jitchol(A)[0])))
        except:
            return np.nan
-        return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
+        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld

    def __str__(self):
        s = parameterised.__str__(self).split('\n')
-        #add priors to the string
+        # add priors to the string
        strs = [str(p) if p is not None else '' for p in self.priors]
        width = np.array(max([len(p) for p in strs] + [5])) + 4

@ -345,16 +320,16 @@ class model(parameterised):
            obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
        obj_funct += '\n\n'
        s[0] = obj_funct + s[0]
-        s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
-        s[1] += '-'*(width + 1)
+        s[0] += "|{h:^{col}}".format(h='Prior', col=width)
+        s[1] += '-' * (width + 1)

-        for p in range(2, len(strs)+2):
-            s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
+        for p in range(2, len(strs) + 2):
+            s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)

        return '\n'.join(s)


-    def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
        """
        Check the gradient of the model by comparing to a numerical estimate.
        If the verbose flag is passed, invividual components are tested (and printed)
@ -374,27 +349,27 @@ class model(parameterised):
        x = self._get_params_transformed().copy()

        if not verbose:
-            #just check the global ratio
-            dx = step*np.sign(np.random.uniform(-1,1,x.size))
+            # just check the global ratio
+            dx = step * np.sign(np.random.uniform(-1, 1, x.size))

-            #evaulate around the point x
-            f1, g1 = self.objective_and_gradients(x+dx)
-            f2, g2 = self.objective_and_gradients(x-dx)
+            # evaulate around the point x
+            f1, g1 = self.objective_and_gradients(x + dx)
+            f2, g2 = self.objective_and_gradients(x - dx)
            gradient = self.objective_function_gradients(x)

-            numerical_gradient = (f1-f2)/(2*dx)
-            global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
+            numerical_gradient = (f1 - f2) / (2 * dx)
+            global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))

-            if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
+            if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
                return True
            else:
                return False
        else:
-            #check the gradient of each parameter individually, and do some pretty printing
+            # check the gradient of each parameter individually, and do some pretty printing
            try:
                names = self._get_param_names_transformed()
            except NotImplementedError:
-                names = ['Variable %i'%i for i in range(len(x))]
+                names = ['Variable %i' % i for i in range(len(x))]

            # Prepare for pretty-printing
            header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@ -403,9 +378,9 @@ class model(parameterised):
            cols = [max_names]
            cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
            cols = np.array(cols) + 5
-            header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+            header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
            header_string = map(lambda x: '|'.join(x), [header_string])
-            separator = '-'*len(header_string[0])
+            separator = '-' * len(header_string[0])
            print '\n'.join([header_string[0], separator])

            if target_param is None:
@ -421,11 +396,11 @@ class model(parameterised):
                f2, g2 = self.objective_and_gradients(xx)
                gradient = self.objective_function_gradients(x)[i]

-                numerical_gradient = (f1-f2)/(2*step)
-                ratio = (f1-f2)/(2*step*gradient)
-                difference = np.abs((f1-f2)/2/step - gradient)
+                numerical_gradient = (f1 - f2) / (2 * step)
+                ratio = (f1 - f2) / (2 * step * gradient)
+                difference = np.abs((f1 - f2) / 2 / step - gradient)

-                if (np.abs(ratio-1)<tolerance):
+                if (np.abs(ratio - 1) < tolerance):
                    formatted_name = "\033[92m {0} \033[0m".format(names[i])
                else:
                    formatted_name = "\033[91m {0} \033[0m".format(names[i])
@ -433,7 +408,7 @@ class model(parameterised):
                d = '%.6f' % float(difference)
                g = '%.6f' % gradient
                ng = '%.6f' % float(numerical_gradient)
-                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
+                grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                print grad_string

    def input_sensitivity(self):
@ -444,21 +419,21 @@ class model(parameterised):
        TODO: proper sensitivity analysis
        """

-        if not hasattr(self,'kern'):
+        if not hasattr(self, 'kern'):
            raise ValueError, "this model has no kernel"

-        k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
-        if (not len(k)==1) or (not k[0].ARD):
+        k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
+        if (not len(k) == 1) or (not k[0].ARD):
            raise ValueError, "cannot determine sensitivity for this kernel"
        k = k[0]

-        if k.name=='rbf':
+        if k.name == 'rbf':
            return k.lengthscale
-        elif k.name=='linear':
-            return 1./k.variances
+        elif k.name == 'linear':
+            return 1. / k.variances


-    def pseudo_EM(self,epsilon=.1,**kwargs):
+    def pseudo_EM(self, epsilon=.1, **kwargs):
        """
        TODO: Should this not bein the GP class?
        EM - like algorithm  for Expectation Propagation and Laplace approximation
@ -472,7 +447,7 @@ class model(parameterised):
        :type optimzer: string TODO: valid strings?

        """
-        assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
+        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
        ll_change = epsilon + 1.
        iteration = 0
        last_ll = -np.exp(1000)
@ -492,9 +467,9 @@ class model(parameterised):
            ll_change = new_ll - last_ll

            if ll_change < 0:
-                self.likelihood = last_approximation #restore previous likelihood approximation
-                self._set_params(last_params) #restore model parameters
-                print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
+                self.likelihood = last_approximation  # restore previous likelihood approximation
+                self._set_params(last_params)  # restore model parameters
+                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                stop = True
            else:
                self.optimize(**kwargs)
@ -503,5 +478,5 @@ class model(parameterised):
                    stop = True
            iteration += 1
            if stop:
-                print "%s iterations." %iteration
+                print "%s iterations." % iteration

--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@ -8,24 +8,25 @@ import copy
 import cPickle
 import os
 from ..util.squashers import sigmoid
+import warnings

-def truncate_pad(string,width,align='m'):
+def truncate_pad(string, width, align='m'):
    """
    A helper function to make aligned strings for parameterised.__str__
    """
-    width=max(width,4)
-    if len(string)>width:
-        return string[:width-3]+'...'
-    elif len(string)==width:
+    width = max(width, 4)
+    if len(string) > width:
+        return string[:width - 3] + '...'
+    elif len(string) == width:
        return string
-    elif len(string)<width:
-        diff = width-len(string)
-        if align=='m':
-            return ' '*np.floor(diff/2.) + string + ' '*np.ceil(diff/2.)
-        elif align=='l':
-            return string + ' '*diff
-        elif align=='r':
-            return ' '*diff + string
+    elif len(string) < width:
+        diff = width - len(string)
+        if align == 'm':
+            return ' ' * np.floor(diff / 2.) + string + ' ' * np.ceil(diff / 2.)
+        elif align == 'l':
+            return string + ' ' * diff
+        elif align == 'r':
+            return ' ' * diff + string
        else:
            raise ValueError

@ -37,15 +38,15 @@ class parameterised(object):
        self.tied_indices = []
        self.constrained_fixed_indices = []
        self.constrained_fixed_values = []
-        self.constrained_positive_indices = np.empty(shape=(0,),dtype=np.int64)
-        self.constrained_negative_indices = np.empty(shape=(0,),dtype=np.int64)
+        self.constrained_positive_indices = np.empty(shape=(0,), dtype=np.int64)
+        self.constrained_negative_indices = np.empty(shape=(0,), dtype=np.int64)
        self.constrained_bounded_indices = []
        self.constrained_bounded_uppers = []
        self.constrained_bounded_lowers = []

-    def pickle(self,filename,protocol=-1):
-        f = file(filename,'w')
-        cPickle.dump(self,f,protocol)
+    def pickle(self, filename, protocol= -1):
+        f = file(filename, 'w')
+        cPickle.dump(self, f, protocol)
        f.close()

    def copy(self):
@ -55,18 +56,85 @@ class parameterised(object):

        return copy.deepcopy(self)

+    @property
+    def params(self):
+        """
+        Returns a **copy** of parameters in non transformed space
+        
+        :see_also: :py:func:`GPy.core.parameterised.params_transformed` 
+        """
+        return self._get_params()
+    @params.setter
+    def params(self, params):
+        self._set_params(params)
+
+    @property
+    def params_transformed(self):
+        """
+        Returns a **copy** of parameters in transformed space
+        
+        :see_also: :py:func:`GPy.core.parameterised.params` 
+        """
+        return self._get_params_transformed()
+    @params_transformed.setter
+    def params_transformed(self, params):
+        self._set_params_transformed(params)
+
+    _get_set_deprecation = """get and set methods wont be available at next minor release
+        in the next releases you will get and set with following syntax:
+        Assume m is a model class:
+        print m['var']          # > prints all parameters matching 'var'
+        m['var'] = 2.           # > sets all parameters matching 'var' to 2.
+        m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>        
+        """
+    def get(self, name):
+        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
+        return self[name]
+
+    def set(self, name, val):
+        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
+        self[name] = val
+
+    def __getitem__(self, name, return_names=False):
+        """
+        Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            if return_names:
+                return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
+            else:
+                return self._get_params()[matches]
+        else:
+            raise AttributeError, "no parameter matches %s" % name
+
+    def __setitem__(self, name, val):
+        """
+        Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
+        """
+        matches = self.grep_param_names(name)
+        if len(matches):
+            val = np.array(val)
+            assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
+            x = self.params
+            x[matches] = val
+            self.params = x
+#             import ipdb;ipdb.set_trace()
+#             self.params[matches] = val
+        else:
+            raise AttributeError, "no parameter matches %s" % name

    def tie_params(self, which):
        matches = self.grep_param_names(which)
        assert matches.size > 0, "need at least something to tie together"
        if len(self.tied_indices):
-            assert not np.any(matches[:,None]==np.hstack(self.tied_indices)), "Some indices are already tied!"
+            assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
        self.tied_indices.append(matches)
-        #TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
-        if hasattr(self,'prior'):
+        # TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
+        if hasattr(self, 'prior'):
            pass

-        self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
+        self._set_params_transformed(self._get_params_transformed())  # sets tied parameters to single value

    def untie_everything(self):
        """Unties all parameters by setting tied_indices to an empty list."""
@ -74,7 +142,7 @@ class parameterised(object):

    def all_constrained_indices(self):
        """Return a np array of all the constrained indices"""
-        ret =  [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
+        ret = [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
        if len(ret):
            return np.hstack(ret)
        else:
@ -117,44 +185,44 @@ class parameterised(object):
        which -- np.array(dtype=int), or regular expression object or string
        """
        matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
        self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
        x = self._get_params()
-        for i,xx in enumerate(x):
-            if (xx<0) & (i in matches):
+        for i, xx in enumerate(x):
+            if (xx < 0) & (i in matches):
                x[i] = -xx
        self._set_params(x)


-    def unconstrain(self,which):
+    def unconstrain(self, which):
        """Unconstrain matching parameters.  does not untie parameters"""
        matches = self.grep_param_names(which)
-        #positive/negative
-        self.constrained_positive_indices = np.delete(self.constrained_positive_indices,np.nonzero(np.sum(self.constrained_positive_indices[:,None]==matches[None,:],1))[0])
-        self.constrained_negative_indices = np.delete(self.constrained_negative_indices,np.nonzero(np.sum(self.constrained_negative_indices[:,None]==matches[None,:],1))[0])
-        #bounded
+        # positive/negative
+        self.constrained_positive_indices = np.delete(self.constrained_positive_indices, np.nonzero(np.sum(self.constrained_positive_indices[:, None] == matches[None, :], 1))[0])
+        self.constrained_negative_indices = np.delete(self.constrained_negative_indices, np.nonzero(np.sum(self.constrained_negative_indices[:, None] == matches[None, :], 1))[0])
+        # bounded
        if len(self.constrained_bounded_indices):
-            self.constrained_bounded_indices = [np.delete(a,np.nonzero(np.sum(a[:,None]==matches[None,:],1))[0]) for a in self.constrained_bounded_indices]
+            self.constrained_bounded_indices = [np.delete(a, np.nonzero(np.sum(a[:, None] == matches[None, :], 1))[0]) for a in self.constrained_bounded_indices]
            if np.hstack(self.constrained_bounded_indices).size:
-                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u,l,i) for u,l,i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
+                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u, l, i) for u, l, i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = list(self.constrained_bounded_uppers), list(self.constrained_bounded_lowers), list(self.constrained_bounded_indices)
            else:
-                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [],[],[]
-        #fixed:
-        for i,indices in enumerate(self.constrained_fixed_indices):
-            self.constrained_fixed_indices[i] = np.delete(indices,np.nonzero(np.sum(indices[:,None]==matches[None,:],1))[0])
-        #remove empty elements
-        tmp = [(i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
+                self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [], [], []
+        # fixed:
+        for i, indices in enumerate(self.constrained_fixed_indices):
+            self.constrained_fixed_indices[i] = np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0])
+        # remove empty elements
+        tmp = [(i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
        if tmp:
            self.constrained_fixed_indices, self.constrained_fixed_values = zip(*tmp)
            self.constrained_fixed_indices, self.constrained_fixed_values = list(self.constrained_fixed_indices), list(self.constrained_fixed_values)
        else:
-            self.constrained_fixed_indices, self.constrained_fixed_values = [],[]
+            self.constrained_fixed_indices, self.constrained_fixed_values = [], []



-    def constrain_negative(self,which):
+    def constrain_negative(self, which):
        """
        Set negative constraints.

@ -163,12 +231,12 @@ class parameterised(object):

        """
        matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
        self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
        x = self._get_params()
-        for i,xx in enumerate(x):
-            if (xx>0.) and (i in matches):
+        for i, xx in enumerate(x):
+            if (xx > 0.) and (i in matches):
                x[i] = -xx
        self._set_params(x)

@ -184,20 +252,20 @@ class parameterised(object):
        lower -- (float) the lower bound on the constraint
        """
        matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
        assert lower < upper, "lower bound must be smaller than upper bound!"
        self.constrained_bounded_indices.append(matches)
        self.constrained_bounded_uppers.append(upper)
        self.constrained_bounded_lowers.append(lower)
-        #check to ensure constraint is in place
+        # check to ensure constraint is in place
        x = self._get_params()
-        for i,xx in enumerate(x):
-            if ((xx<=lower)|(xx>=upper)) & (i in matches):
-                x[i] = sigmoid(xx)*(upper-lower) + lower
+        for i, xx in enumerate(x):
+            if ((xx <= lower) | (xx >= upper)) & (i in matches):
+                x[i] = sigmoid(xx) * (upper - lower) + lower
        self._set_params(x)


-    def constrain_fixed(self, which, value = None):
+    def constrain_fixed(self, which, value=None):
        """
        Arguments
        ---------
@ -211,14 +279,14 @@ class parameterised(object):
        To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
        """
        matches = self.grep_param_names(which)
-        assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
+        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
        self.constrained_fixed_indices.append(matches)
        if value != None:
            self.constrained_fixed_values.append(value)
        else:
            self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])

-        #self.constrained_fixed_values.append(value)
+        # self.constrained_fixed_values.append(value)
        self._set_params_transformed(self._get_params_transformed())

    def _get_params_transformed(self):
@ -226,40 +294,40 @@ class parameterised(object):
        x = self._get_params()
        x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
        x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
-        [np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(x, i, np.log(np.clip(x[i] - l, 1e-10, np.inf) / np.clip(h - x[i], 1e-10, np.inf))) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]

-        to_remove = self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]
+        to_remove = self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]
        if len(to_remove):
-            return np.delete(x,np.hstack(to_remove))
+            return np.delete(x, np.hstack(to_remove))
        else:
            return x


-    def _set_params_transformed(self,x):
+    def _set_params_transformed(self, x):
        """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""

-        #work out how many places are fixed, and where they are. tricky logic!
+        # work out how many places are fixed, and where they are. tricky logic!
        Nfix_places = 0.
        if len(self.tied_indices):
-            Nfix_places += np.hstack(self.tied_indices).size-len(self.tied_indices)
+            Nfix_places += np.hstack(self.tied_indices).size - len(self.tied_indices)
        if len(self.constrained_fixed_indices):
            Nfix_places += np.hstack(self.constrained_fixed_indices).size
        if Nfix_places:
-            fix_places = np.hstack(self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])
+            fix_places = np.hstack(self.constrained_fixed_indices + [t[1:] for t in self.tied_indices])
        else:
            fix_places = []

-        free_places = np.setdiff1d(np.arange(Nfix_places+x.size,dtype=np.int),fix_places)
+        free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)

-        #put the models values in the vector xx
-        xx = np.zeros(Nfix_places+free_places.size,dtype=np.float64)
+        # put the models values in the vector xx
+        xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)

        xx[free_places] = x
-        [np.put(xx,i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
-        [np.put(xx,i,v) for i,v in [(t[1:],xx[t[0]]) for t in self.tied_indices] ]
+        [np.put(xx, i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
+        [np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
        xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
        xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
-        [np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
+        [np.put(xx, i, low + sigmoid(xx[i]) * (high - low)) for i, low, high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
        self._set_params(xx)

    def _get_param_names_transformed(self):
@ -267,33 +335,33 @@ class parameterised(object):
        Returns the parameter names as propagated after constraining,
        tying or fixing, i.e. a list of the same length as _get_params_transformed()
        """
-        n =  self._get_param_names()
+        n = self._get_param_names()

-        #remove/concatenate the tied parameter names
+        # remove/concatenate the tied parameter names
        if len(self.tied_indices):
            for t in self.tied_indices:
                n[t[0]] = "<tie>".join([n[tt] for tt in t])
            remove = np.hstack([t[1:] for t in self.tied_indices])
        else:
-            remove=np.empty(shape=(0,),dtype=np.int)
+            remove = np.empty(shape=(0,), dtype=np.int)

-        #also remove the fixed params
+        # also remove the fixed params
        if len(self.constrained_fixed_indices):
            remove = np.hstack((remove, np.hstack(self.constrained_fixed_indices)))

-        #add markers to show that some variables are constrained
+        # add markers to show that some variables are constrained
        for i in self.constrained_positive_indices:
-            n[i] = n[i]+'(+ve)'
+            n[i] = n[i] + '(+ve)'
        for i in self.constrained_negative_indices:
-            n[i] = n[i]+'(-ve)'
-        for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
+            n[i] = n[i] + '(-ve)'
+        for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
            for ii in i:
-                n[ii] = n[ii]+'(bounded)'
+                n[ii] = n[ii] + '(bounded)'

-        n = [nn for i,nn in enumerate(n) if not i in remove]
+        n = [nn for i, nn in enumerate(n) if not i in remove]
        return n

-    def __str__(self,nw=30):
+    def __str__(self, nw=30):
        """
        Return a string describing the parameter names and their ties and constraints
        """
@ -302,10 +370,10 @@ class parameterised(object):

        if not N:
            return "This object has no free parameters."
-        header = ['Name','Value','Constraints','Ties']
-        values = self._get_params() #map(str,self._get_params())
-        #sort out the constraints
-        constraints = ['']*len(names)
+        header = ['Name', 'Value', 'Constraints', 'Ties']
+        values = self._get_params()  # map(str,self._get_params())
+        # sort out the constraints
+        constraints = [''] * len(names)
        for i in self.constrained_positive_indices:
            constraints[i] = '(+ve)'
        for i in self.constrained_negative_indices:
@ -313,14 +381,14 @@ class parameterised(object):
        for i in self.constrained_fixed_indices:
            for ii in i:
                constraints[ii] = 'Fixed'
-        for i,u,l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
+        for i, u, l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
            for ii in i:
-                constraints[ii] = '('+str(l)+', '+str(u)+')'
-        #sort out the ties
-        ties = ['']*len(names)
-        for i,tie in enumerate(self.tied_indices):
+                constraints[ii] = '(' + str(l) + ', ' + str(u) + ')'
+        # sort out the ties
+        ties = [''] * len(names)
+        for i, tie in enumerate(self.tied_indices):
            for j in tie:
-                ties[j] = '('+str(i)+')'
+                ties[j] = '(' + str(i) + ')'

        values = ['%.4f' % float(v) for v in values]
        max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
@ -330,10 +398,10 @@ class parameterised(object):
        cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
        columns = cols.sum()

-        header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
+        header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
        header_string = map(lambda x: '|'.join(x), [header_string])
-        separator = '-'*len(header_string[0])
-        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n = names[i], v = values[i], c = constraints[i], t = ties[i], c0 = cols[0], c1 = cols[1], c2 = cols[2], c3 = cols[3]) for i in range(len(values))]
+        separator = '-' * len(header_string[0])
+        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]


-        return ('\n'.join([header_string[0], separator]+param_string)) + '\n'
+        return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -112,14 +112,14 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
    s3 = s3(x)
    sS = sS(x)

-    s1 -= s1.mean()
-    s2 -= s2.mean()
-    s3 -= s3.mean()
-    sS -= sS.mean()
-    s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
-    s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
-    s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
-    sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
+#     s1 -= s1.mean()
+#     s2 -= s2.mean()
+#     s3 -= s3.mean()
+#     sS -= sS.mean()
+#     s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
+#     s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
+#     s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
+#     sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())

    S1 = np.hstack([s1, sS])
    S2 = np.hstack([s2, sS])
@ -129,9 +129,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
    Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
    Y3 = S3.dot(np.random.randn(S3.shape[1], D3))

-    Y1 += .5 * np.random.randn(*Y1.shape)
-    Y2 += .5 * np.random.randn(*Y2.shape)
-    Y3 += .5 * np.random.randn(*Y3.shape)
+    Y1 += .3 * np.random.randn(*Y1.shape)
+    Y2 += .3 * np.random.randn(*Y2.shape)
+    Y3 += .3 * np.random.randn(*Y3.shape)

    Y1 -= Y1.mean(0)
    Y2 -= Y2.mean(0)
@ -162,8 +162,11 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):

    return slist, [S1, S2, S3], Ylist

-def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
-    D1, D2, D3, N, M, Q = 2000, 8, 8, 500, 2, 6
+def bgplvm_simulation(burnin='scg', plot_sim=False,
+                      max_burnin=100, true_X=False,
+                      do_opt=True,
+                      max_f_eval=1000):
+    D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)

    from GPy.models import mrd
@ -171,53 +174,73 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
    reload(mrd); reload(kern)


-    Y = Ylist[1]
+    Y = Ylist[0]

    k = kern.linear(Q, ARD=True) + kern.white(Q, .00001)  # + kern.bias(Q)
-    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
+#     k = kern.white(Q, .00001) + kern.bias(Q)
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
    # m.set('noise',)
+    m.ensure_default_constraints()
 #     m.auto_scale_factor = True
 #     m.scale_factor = 1.
-    m.ensure_default_constraints()


    if burnin:
        print "initializing beta"
        cstr = "noise"
-        m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 100.)
-        m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
+        m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 70.)
+        m.optimize(burnin, messages=1, max_f_eval=max_burnin)

        print "releasing beta"
        cstr = "noise"
        m.unconstrain(cstr);  m.constrain_positive(cstr)

-    true_X = np.hstack((slist[1], slist[3], 0. * np.ones((N, Q - 2))))
-    m.set('X_\d', true_X)
-    m.constrain_fixed("X_\d")
+    if true_X:
+        true_X = np.hstack((slist[0], slist[3], 0. * np.ones((N, Q - 2))))
+        m.set('X_\d', true_X)
+        m.constrain_fixed("X_\d")

-# #     cstr = 'variance'
-# #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
+        cstr = 'X_variance'
+#         m.unconstrain(cstr), m.constrain_fixed(cstr, .0001)
+        m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-7, .1)
+
+#     cstr = 'X_variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
+
+    m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
+
+#     cstr = "iip"
+#     m.unconstrain(cstr); m.constrain_fixed(cstr)
+
+#     cstr = 'variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
 #     cstr = 'X_\d'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, -100., 100.)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, -10., 10.)
 #
 #     cstr = 'noise'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-5, 1.)
 #
 #     cstr = 'white'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
 #
 #     cstr = 'linear_variance'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)  # m.constrain_positive(cstr)
-#
-#     cstr = 'X_variance'
-#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)  # m.constrain_positive(cstr)
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
+
+#     cstr = 'variance'
+#     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)

 #     np.seterr(all='call')
 #     def ipdbonerr(errtype, flags):
 #         import ipdb; ipdb.set_trace()
 #     np.seterrcall(ipdbonerr)

-
+    if do_opt and burnin:
+        try:
+            m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
+        except:
+            pass
+        finally:
+            return m
    return m

 def mrd_simulation(plot_sim=False):
@ -261,6 +284,7 @@ def mrd_simulation(plot_sim=False):
        m.set('{}_noise'.format(i + 1), Y.var() / 100.)

    m.ensure_default_constraints()
+    m.auto_scale_factor = True

 #     cstr = 'variance'
 #     m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
--- a/GPy/inference/natural_gradient_scg.py
+++ b/GPy/inference/natural_gradient_scg.py
@ -0,0 +1,146 @@
+#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
+
+#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
+
+#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+#      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+#      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+#      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+#      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+#      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+#      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+#      HOWEVER CAUSED AND ON ANY THEORY OF
+#      LIABILITY, WHETHER IN CONTRACT, STRICT
+#      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#      POSSIBILITY OF SUCH DAMAGE.
+
+
+import numpy as np
+import sys
+
+def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
+    """
+    Optimisation through Scaled Conjugate Gradients (SCG)
+
+    f: the objective function
+    gradf : the gradient function (should return a 1D np.ndarray)
+    x : the initial condition
+
+    Returns
+    x the optimal value for x
+    flog : a list of all the objective values
+
+    """
+
+    sigma0 = 1.0e-4
+    fold = f(x, *optargs)	# Initial function value.
+    function_eval = 1
+    fnow = fold
+    gradnew = gradf(x, *optargs)	# Initial gradient.
+    gradold = gradnew.copy()
+    d = -gradnew				# Initial search direction.
+    success = True				# Force calculation of directional derivs.
+    nsuccess = 0				# nsuccess counts number of successes.
+    beta = 1.0				# Initial scale parameter.
+    betamin = 1.0e-15 			# Lower bound on scale.
+    betamax = 1.0e100			# Upper bound on scale.
+    status = "Not converged"
+
+    flog = [fold]
+
+    iteration = 0
+
+    # Main optimization loop.
+    while iteration < maxiters:
+
+        # Calculate first and second directional derivatives.
+        if success:
+            mu = np.dot(d, gradnew)
+            if mu >= 0:
+                d = -gradnew
+                mu = np.dot(d, gradnew)
+            kappa = np.dot(d, d)
+            sigma = sigma0/np.sqrt(kappa)
+            xplus = x + sigma*d
+            gplus = gradf(xplus, *optargs)
+            theta = np.dot(d, (gplus - gradnew))/sigma
+
+        # Increase effective curvature and evaluate step size alpha.
+        delta = theta + beta*kappa
+        if delta <= 0:
+            delta = beta*kappa
+            beta = beta - theta/kappa
+
+        alpha = - mu/delta
+
+        # Calculate the comparison ratio.
+        xnew = x + alpha*d
+        fnew = f(xnew, *optargs)
+        function_eval += 1
+
+        if function_eval >= max_f_eval:
+            status = "Maximum number of function evaluations exceeded"
+            return x, flog, function_eval, status
+
+        Delta = 2.*(fnew - fold)/(alpha*mu)
+        if Delta  >= 0.:
+            success = True
+            nsuccess += 1
+            x = xnew
+            fnow = fnew
+        else:
+            success = False
+            fnow = fold
+
+        # Store relevant variables
+        flog.append(fnow)		# Current function value
+
+        iteration += 1
+        if display:
+            print '\r',
+            print 'Iteration: {0:>5g}  Objective:{1:> 12e}  Scale:{2:> 12e}'.format(iteration, fnow, beta),
+            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+            sys.stdout.flush()
+
+        if success:
+            # Test for termination
+            if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
+                status='converged'
+                return x, flog, function_eval, status
+
+            else:
+                # Update variables for new position
+                fold = fnew
+                gradold = gradnew
+                gradnew = gradf(x, *optargs)
+                # If the gradient is zero then we are done.
+                if np.dot(gradnew,gradnew) == 0:
+                    return x, flog, function_eval, status
+
+        # Adjust beta according to comparison ratio.
+        if Delta < 0.25:
+            beta = min(4.0*beta, betamax)
+        if Delta > 0.75:
+            beta = max(0.5*beta, betamin)
+
+        # Update search direction using Polak-Ribiere formula, or re-start
+        # in direction of negative gradient after nparams steps.
+        if nsuccess == x.size:
+            d = -gradnew
+            nsuccess = 0
+        elif success:
+            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
+            d = gamma*d - gradnew
+
+    # If we get here, then we haven't terminated in the given number of
+    # iterations.
+    status = "maxiter exceeded"
+
+    return x, flog, function_eval, status
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -70,8 +70,8 @@ class kern(parameterised):
                    ard_params = 1. / p.lengthscale

                ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
-                ax.set_xticks(np.arange(len(ard_params)),
-                              ["${}$".format(i + 1) for i in range(len(ard_params))])
+                ax.set_xticks(np.arange(len(ard_params)))
+                ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
        return ax

    def _transform_gradients(self, g):
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -10,6 +10,7 @@ from GPy.util.linalg import pdinv
 from ..likelihoods import Gaussian
 from .. import kern
 from numpy.linalg.linalg import LinAlgError
+import itertools

 class Bayesian_GPLVM(sparse_GP, GPLVM):
    """
@ -23,7 +24,9 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
    :type init: 'PCA'|'random'

    """
-    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, oldpsave=5, **kwargs):
+    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
+                 Z=None, kernel=None, oldpsave=5, _debug=False,
+                 **kwargs):
        if X == None:
            X = self.initialise_latent(init, Q, Y)

@ -39,6 +42,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):

        self.oldpsave = oldpsave
        self._oldps = []
+        self._debug = _debug
+
+        if self._debug:
+            self._count = itertools.count()
+            self._savedklll = []
+            self._savedparams = []

        sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)

@ -70,16 +79,18 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
        return x

-    def _set_params(self, x, save_old=True):
+    def _set_params(self, x, save_old=True, save_count=0):
        try:
            N, Q = self.N, self.Q
            self.X = x[:self.X.size].reshape(N, Q).copy()
            self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
            sparse_GP._set_params(self, x[(2 * N * Q):])
            self.oldps = x
-        except (LinAlgError, FloatingPointError):
-            print "\rWARNING: Caught LinAlgError, reconstructing old state            "
-            self._set_params(self.oldps[-1], save_old=False)
+        except (LinAlgError, FloatingPointError, ZeroDivisionError):
+            print "\rWARNING: Caught LinAlgError, continueing without setting            "
+#             if save_count > 10:
+#                 raise
+#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)

    def dKL_dmuS(self):
        dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@ -103,15 +114,29 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
    def log_likelihood(self):
        ll = sparse_GP.log_likelihood(self)
        kl = self.KL_divergence()
-        return ll + kl
+
+#         if ll < -2E4:
+#             ll = -2E4 + np.random.randn()
+#         if kl > 5E4:
+#             kl = 5E4 + np.random.randn()
+
+        if self._debug:
+            f_call = self._count.next()
+            self._savedklll.append([f_call, ll, kl])
+            if f_call % 1 == 0:
+                self._savedparams.append([f_call, self._get_params()])
+
+
+        # print "\nkl:", kl, "ll:", ll
+        return ll - kl

    def _log_likelihood_gradients(self):
        dKL_dmu, dKL_dS = self.dKL_dmuS()
        dL_dmu, dL_dS = self.dL_dmuS()
        # TODO: find way to make faster

-        d_dmu = (dL_dmu + dKL_dmu).flatten()
-        d_dS = (dL_dS + dKL_dS).flatten()
+        d_dmu = (dL_dmu - dKL_dmu).flatten()
+        d_dS = (dL_dS - dKL_dS).flatten()
        # TEST KL: ====================
        # d_dmu = (dKL_dmu).flatten()
        # d_dS = (dKL_dS).flatten()
@ -135,3 +160,140 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
        ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
        ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
        return ax
+
+    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
+        import pylab
+
+        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
+        if colors is None:
+            colors = pylab.gca()._get_lines.color_cycle
+            pylab.clf()
+        plots = []
+        for i in range(self.X.shape[1]):
+            if axes is None:
+                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
+            else:
+                ax = axes[i]
+            ax.plot(self.X, c='k', alpha=.3)
+            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
+            ax.fill_between(np.arange(self.X.shape[0]),
+                            self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
+                            self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
+                            facecolor=plots[-1].get_color(),
+                            alpha=.3)
+            ax.legend(borderaxespad=0.)
+            if i < self.X.shape[1] - 1:
+                ax.set_xticklabels('')
+        pylab.draw()
+        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
+        return fig
+
+    def _debug_filter_params(self, x):
+        start, end = 0, self.X.size,
+        X = x[start:end].reshape(self.N, self.Q)
+        start, end = end, end + self.X_variance.size
+        X_v = x[start:end].reshape(self.N, self.Q)
+        start, end = end, end + (self.M * self.Q)
+        Z = x[start:end].reshape(self.M, self.Q)
+        start, end = end, end + self.Q
+        theta = x[start:]
+        return X, X_v, Z, theta
+
+    def _debug_plot(self):
+        assert self._debug, "must enable _debug, to debug-plot"
+        import pylab
+        from mpl_toolkits.mplot3d import Axes3D
+        fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
+        fig.clf()
+
+        # log like
+        splotshape = (6, 4)
+        ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
+        ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
+                 ha='center', va='center')
+        kllls = np.array(self._savedklll)
+        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], label=r'$\log p(\mathbf{Y})$', mew=1.5)
+        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
+        L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5)  # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
+
+        drawn = dict(self._savedparams)
+        iters = np.array(drawn.keys())
+        self.showing = 0
+
+        ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
+        ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
+                 ha='center', va='center')
+        ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
+        ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
+                 ha='center', va='center')
+        ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
+        ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
+                 ha='center', va='center')
+        ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
+        ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
+                 ha='center', va='center')
+
+        X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+        Xlatentplts = ax2.plot(X, ls="-", marker="x")
+        Slatentplts = ax3.plot(S, ls="-", marker="x")
+        Zplts = ax4.plot(Z, ls="-", marker="x")
+        thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
+        ax5.set_xticks(np.arange(len(theta)))
+        ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
+
+        Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
+                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+                   borderaxespad=0, mode="expand")
+        Lleg = ax1.legend()
+        Lleg.draggable()
+        ax1.add_artist(Qleg)
+
+        indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
+        indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
+        indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
+
+        try:
+            pylab.draw()
+            pylab.tight_layout(box=(0, .1, 1, .9))
+        except:
+            pass
+
+        # parameter changes
+        # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
+        def onclick(event):
+            if event.inaxes is ax1 and event.button == 1:
+#               event.button, event.x, event.y, event.xdata, event.ydata)
+                tmp = np.abs(iters - event.xdata)
+                closest_hit = iters[tmp == tmp.min()][0]
+
+                if closest_hit != self.showing:
+                    self.showing = closest_hit
+                    # print closest_hit, iters, event.xdata
+
+                    indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
+                    indicatorKL.set_data(self.showing, kllls[self.showing, 2])
+                    indicatorL.set_data(self.showing, kllls[self.showing, 1])
+
+                    X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
+                    for i, Xlatent in enumerate(Xlatentplts):
+                        Xlatent.set_ydata(X[:, i])
+                    for i, Slatent in enumerate(Slatentplts):
+                        Slatent.set_ydata(S[:, i])
+                    for i, Zlatent in enumerate(Zplts):
+                        Zlatent.set_ydata(Z[:, i])
+                    for p, t in zip(thetaplts, theta):
+                        p.set_height(t)
+
+                    ax2.relim()
+                    ax3.relim()
+                    ax4.relim()
+                    ax5.relim()
+                    ax2.autoscale()
+                    ax3.autoscale()
+                    ax4.autoscale()
+                    ax5.autoscale()
+                    fig.canvas.draw()
+
+        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+
+        return ax1, ax2, ax3, ax4, ax5
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -287,29 +287,6 @@ class MRD(model):
        else:
            return pylab.gcf()

-    def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
-        fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
-        if colors is None:
-            colors = pylab.gca()._get_lines.color_cycle
-            pylab.clf()
-        plots = []
-        for i in range(self.X.shape[1]):
-            if axes is None:
-                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
-            ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
-            ax.fill_between(numpy.arange(self.X.shape[0]),
-                            self.X.T[i] - 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            self.X.T[i] + 2 * numpy.sqrt(self.gref.X_variance.T[i]),
-                            facecolor=plots[-1].get_color(),
-                            alpha=.3)
-            ax.legend(borderaxespad=0.)
-            if i < self.X.shape[1] - 1:
-                ax.set_xticklabels('')
-        pylab.draw()
-        fig.tight_layout(h_pad=.01)  # , rect=(0, 0, 1, .95))
-        return fig
-
    def plot_X(self, fig_num="MRD Predictions", axes=None):
        fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
        return fig
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@ -57,6 +57,7 @@ class Test(unittest.TestCase):
    X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
    Z = numpy.random.permutation(X)[:M]
    Y = X.dot(numpy.random.randn(Q, D))
+    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q)]

    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
               GPy.kern.linear(Q) + GPy.kern.bias(Q),