[optimizer] one copy for the optimizer in optimizer_array, use this instead of _set|get_params_transformed

2026-05-06 18:42:39 +02:00 · 2014-05-22 11:39:04 +01:00 · 2014-05-22 11:39:04 +01:00 · 5a2bc4863b
commit 5a2bc4863b
parent 43ee8ce614
7 changed files with 158 additions and 79 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -61,7 +61,7 @@ class Model(Parameterized):
        on the current machine.
        """
-        initial_parameters = self._get_params_transformed()
+        initial_parameters = self.optimizer_array
        if parallel:
            try:
@ -124,13 +124,15 @@ class Model(Parameterized):
        For probabilistic models this is the negative log_likelihood
        (including the MAP prior), so we return it here. If your model is not 
-        probabilistic, just return your objective here!
+        probabilistic, just return your objective to minimize here!
        """
        return -float(self.log_likelihood()) - self.log_prior()
    def objective_function_gradients(self):
        """
        The gradients for the objective function for the given algorithm.
        The gradients are w.r.t. the *negative* objective function, as 
        this framework works with *negative* log-likelihoods as a default.
        You can find the gradient for the parameters in self.gradient at all times.
        This is the place, where gradients get stored for parameters.
@ -141,7 +143,7 @@ class Model(Parameterized):
        For probabilistic models this is the gradient of the negative log_likelihood
        (including the MAP prior), so we return it here. If your model is not 
-        probabilistic, just return your gradient here!
+        probabilistic, just return your *negative* gradient here!
        """
        return -(self._log_likelihood_gradients() + self._log_prior_gradients())
@ -157,7 +159,8 @@ class Model(Parameterized):
        :type x: np.array
        """
        try:
-            self._set_params_transformed(x)
+            # self._set_params_transformed(x)
            self.optimizer_array = x
            obj_grads = self._transform_gradients(self.objective_function_gradients())
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -180,7 +183,7 @@ class Model(Parameterized):
        :parameter type: np.array
        """
        try:
-            self._set_params_transformed(x)
+            self.optimizer_array = x
            obj = self.objective_function()
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -192,7 +195,7 @@ class Model(Parameterized):
    def _objective_grads(self, x):
        try:
-            self._set_params_transformed(x)
+            self.optimizer_array = x
            obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -226,7 +229,7 @@ class Model(Parameterized):
            optimizer = self.preferred_optimizer
        if start == None:
-            start = self._get_params_transformed()
+            start = self.optimizer_array
        optimizer = optimization.get_optimizer(optimizer)
        opt = optimizer(start, model=self, **kwargs)
@ -235,7 +238,7 @@ class Model(Parameterized):
        self.optimization_runs.append(opt)
-        self._set_params_transformed(opt.x_opt)
+        self.optimizer_array = opt.x_opt
    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
        # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@ -260,7 +263,7 @@ class Model(Parameterized):
           The gradient is considered correct if the ratio of the analytical
           and numerical gradients is within <tolerance> of unity.
        """
-        x = self._get_params_transformed().copy()
+        x = self.optimizer_array.copy()
        if not verbose:
            # make sure only to test the selected parameters
@ -270,8 +273,8 @@ class Model(Parameterized):
                transformed_index = self._raveled_index_for(target_param)
                if self._has_fixes():
                    indices = np.r_[:self.size]
-                    which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
+                    which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
-                    transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
+                    transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
                if transformed_index.size == 0:
                    print "No free parameters to check"
@ -290,7 +293,7 @@ class Model(Parameterized):
            gradient = gradient[transformed_index]
            denominator = (2 * np.dot(dx, gradient))
-            global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
+            global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
            global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
            if global_ratio is np.nan:
                global_ratio = 0
@ -319,10 +322,10 @@ class Model(Parameterized):
                param_index = self._raveled_index_for(target_param)
                if self._has_fixes():
                    indices = np.r_[:self.size]
-                    which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
+                    which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
                    param_index = param_index[which[0]]
-                    transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
+                    transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
-                    #print param_index, transformed_index
+                    # print param_index, transformed_index
                else:
                    transformed_index = param_index
@ -340,7 +343,7 @@ class Model(Parameterized):
                xx[xind] -= 2.*step
                f2 = self._objective(xx)
                numerical_gradient = (f1 - f2) / (2 * step)
-                if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
+                if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
                else: ratio = (f1 - f2) / (2 * step * gradient[xind])
                difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
@ -358,7 +361,7 @@ class Model(Parameterized):
                grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                print grad_string
-            self._set_params_transformed(x)
+            self.optimizer_array = x
            return ret
--- a/GPy/core/parameterization/observable_array.py
+++ b/GPy/core/parameterization/observable_array.py
@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
        s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
        memo[id(self)] = s
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo))
+        Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
        return s
    def __reduce__(self):
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -4,7 +4,7 @@
 import itertools
 import numpy
 np = numpy
-from parameter_core import Parameterizable, adjust_name_for_printing
+from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
 from observable_array import ObsAr
 ###### printing
@ -221,10 +221,9 @@ class Param(Parameterizable, ObsAr):
        s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
        memo[id(self)] = s
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo))
+        Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
        return s
    #===========================================================================
    # Printing -> done
    #===========================================================================
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -16,6 +16,7 @@ Observable Pattern for patameterization
 from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np
 import re
 import logging
 __updated__ = '2014-05-21'
@ -49,7 +50,6 @@ class Observable(object):
    as an observer. Every time the observable changes, it sends a notification with
    self as only argument to all its observers.
    """
    _updated = True
    _updates = True
    def __init__(self, *args, **kwargs):
        super(Observable, self).__init__()
@ -58,13 +58,19 @@ class Observable(object):
    @property
    def updates(self):
-        self._updates = self._highest_parent_._updates
+        p = getattr(self, '_highest_parent_', None)
        if p is not None:
            self._updates = p._updates
        return self._updates
    @updates.setter
    def updates(self, ups):
        assert isinstance(ups, bool), "updates are either on (True) or off (False)"
-        self._highest_parent_._updates = ups
+        p = getattr(self, '_highest_parent_', None)
        if p is not None:
            p._updates = ups
        else:
            self._updates = ups
        if ups:
            self._trigger_params_changed()
@ -172,6 +178,7 @@ class Pickleable(object):
    """
    def __init__(self, *a, **kw):
        super(Pickleable, self).__init__()
    #===========================================================================
    # Pickling operations
    #===========================================================================
@ -208,21 +215,25 @@ class Pickleable(object):
            memo[id(p)] = None # set all parents to be None, so they will not be copied
        memo[id(self.gradient)] = None # reset the gradient
        memo[id(self.param_array)] = None # and param_array
        memo[id(self.optimizer_array)] = None # and param_array
        memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
        c = copy.deepcopy(self, memo) # and start the copy
        c._parent_index_ = None
        c._trigger_params_changed()
        return c
    def __deepcopy__(self, memo):
        s = self.__new__(self.__class__) # fresh instance
        memo[id(self)] = s # be sure to break all cycles --> self is already done
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
+        s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
        return s
    def __getstate__(self):
        ignore_list = ['_param_array_', # parameters get set from bottom to top
                       '_gradient_array_', # as well as gradients
                       '_optimizer_copy_',
                       'logger',
                       '_fixes_', # and fixes
                       '_Cacher_wrap__cachers', # never pickle cachers
                       ]
@ -234,7 +245,8 @@ class Pickleable(object):
    def __setstate__(self, state):
        self.__dict__.update(state)
-        return self
+        self._transformed = True
 class Gradcheckable(Pickleable, Parentable):
    """
@ -324,7 +336,6 @@ class Indexable(Nameable, Observable):
        self._default_constraint_ = default_constraint
        from index_operations import ParameterIndexOperations
        self.constraints = ParameterIndexOperations()
        self._old_constraints = ParameterIndexOperations()
        self.priors = ParameterIndexOperations()
        if self._default_constraint_ is not None:
            self.constrain(self._default_constraint_)
@ -617,36 +628,103 @@ class OptimizationHandlable(Indexable):
    """
    def __init__(self, name, default_constraint=None, *a, **kw):
        super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
        self._optimizer_copy_ = None
        self._transformed = True
-    def _get_params_transformed(self):
+    #===========================================================================
-        # transformed parameters (apply un-transformation rules)
+    # Optimizer copy
-        p = self.param_array.copy()
+    #===========================================================================    
-        [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
+    @property
-        if self.has_parent() and self.constraints[__fixed__].size != 0:
+    def optimizer_array(self):
-            fixes = np.ones(self.size).astype(bool)
+        """
-            fixes[self.constraints[__fixed__]] = FIXED
+        Array for the optimizer to work on.
-            return p[fixes]
+        This array always lives in the space for the optimizer.
-        elif self._has_fixes():
+        Thus, it is untransformed, going from Transformations.
            return p[self._fixes_]
        return p
-    def _set_params_transformed(self, p):
+        Setting this array, will make sure the transformed parameters for this model
        will be set accordingly. It has to be set with an array, retrieved from
        this method, as e.g. fixing will resize the array.  
        The optimizer should only interfere with this array, such that transofrmations
        are secured.
        """
-        Set parameters p, but make sure they get transformed before setting.
+        if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
-        This means, the optimizer sees p, whereas the model sees transformed(p), 
+            self._optimizer_copy_ = np.empty(self.size)
-        such that, the parameters the model sees are in the right domain.
+        
-        """
+        if self._transformed:
-        if not(p is self.param_array):
+            self._optimizer_copy_.flat = self.param_array.flat
            [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
            if self.has_parent() and self.constraints[__fixed__].size != 0:
                fixes = np.ones(self.size).astype(bool)
                fixes[self.constraints[__fixed__]] = FIXED
-                self.param_array.flat[fixes] = p
+                return self._optimizer_copy_[fixes]
-            elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
+            elif self._has_fixes():
-            else: self.param_array.flat = p
+                return self._optimizer_copy_[self._fixes_]
-        [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) 
+            self._transformed = False
        return self._optimizer_copy_
    @optimizer_array.setter
    def optimizer_array(self, p):
        """
        Make sure the optimizer copy does not get touched, thus, we only want to 
        set the values *inside* not the array itself.
        Also we want to update param_array in here.
        """
        if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
            self._optimizer_copy_ = np.empty(self.size)
        self._optimizer_copy_.flat = self.param_array.flat
        if self.has_parent() and self.constraints[__fixed__].size != 0:
            fixes = np.ones(self.size).astype(bool)
            fixes[self.constraints[__fixed__]] = FIXED
            self._optimizer_copy_.flat[fixes] = p
        elif self._has_fixes(): self._optimizer_copy_.flat[self._fixes_] = p
        else: self._optimizer_copy_.flat = p
        self.param_array.flat = self._optimizer_copy_.flat
        [np.put(self.param_array, ind, c.f(self._optimizer_copy_.flat[ind])) 
         for c, ind in self.constraints.iteritems() if c != __fixed__]
        self._transformed = True
        self._trigger_params_changed()
    def _get_params_transformed(self):
        raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
 #         # transformed parameters (apply un-transformation rules)
 #         p = self.param_array.copy()
 #         [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
 #         if self.has_parent() and self.constraints[__fixed__].size != 0:
 #             fixes = np.ones(self.size).astype(bool)
 #             fixes[self.constraints[__fixed__]] = FIXED
 #             return p[fixes]
 #         elif self._has_fixes():
 #             return p[self._fixes_]
 #         return p
 # 
    def _set_params_transformed(self, p):
        raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
 #         """
 #         Set parameters p, but make sure they get transformed before setting.
 #         This means, the optimizer sees p, whereas the model sees transformed(p), 
 #         such that, the parameters the model sees are in the right domain.
 #         """
 #         if not(p is self.param_array):
 #             if self.has_parent() and self.constraints[__fixed__].size != 0:
 #                 fixes = np.ones(self.size).astype(bool)
 #                 fixes[self.constraints[__fixed__]] = FIXED
 #                 self.param_array.flat[fixes] = p
 #             elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
 #             else: self.param_array.flat = p
 #         [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) 
 #          for c, ind in self.constraints.iteritems() if c != __fixed__]
 #         self._trigger_params_changed()
    def _trigger_params_changed(self, trigger_parent=True):
        """
        First tell all children to update,
@ -725,7 +803,7 @@ class OptimizationHandlable(Indexable):
        x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
        # now draw from prior where possible
        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
-        self._set_params_transformed(x)  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
    #===========================================================================
    # For shared memory arrays. This does nothing in Param, but sets the memory
@ -784,6 +862,7 @@ class Parameterizable(OptimizationHandlable):
        self.parameters = ArrayList()
        self._param_array_ = None
        self._added_names_ = set()
        self.logger = logging.getLogger(self.__class__.__name__)
        self.__visited = False # for traversing in reverse order we need to know if we were here already
    @property
@ -894,6 +973,11 @@ class Parameterizable(OptimizationHandlable):
        self._remove_parameter_name(None, old_name)
        self._add_parameter_name(param)
    def __setstate__(self, state):
        super(Parameterizable, self).__setstate__(state)
        self.logger = logging.getLogger(self.__class__.__name__)
        return self
    #===========================================================================
    # notification system
    #===========================================================================
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -26,7 +26,7 @@ class BayesianGPLVM(SparseGP):
    """
    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
-        self.logger = logging.getLogger("Bayesian GPLVM <{}>".format(hex(id(self))))
+        self.logger = logging.getLogger(self.__class__.__name__)
        if X == None:
            from ..util.initialization import initialize_latent
            self.logger.info("initializing latent space X with method {}".format(init))
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -59,7 +59,7 @@ class MRD(SparseGP):
                 inference_method=None, likelihoods=None, name='mrd', Ynames=None):
        super(GP, self).__init__(name)
-        self.logger = logging.getLogger("MRD <{}>".format(hex(id(self))))
+        self.logger = logging.getLogger(self.__class__.__name__)
        self.input_dim = input_dim
        self.num_inducing = num_inducing
@ -107,16 +107,16 @@ class MRD(SparseGP):
        self.logger.info("building kernels")
        if kernel is None:
            from ..kern import RBF
-            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
+            kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
-            self.kernels = []
+            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
-                self.kernels.append(k)
+                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
            assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
-            self.kernels = kernel
+            kernels = kernel
        if X_variance is None:
            X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -125,8 +125,8 @@ class MRD(SparseGP):
        self.X = NormalPosterior(X, X_variance)
        if likelihoods is None:
-            self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
+            likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
-        else: self.likelihoods = likelihoods
+        else: likelihoods = likelihoods
        self.logger.info("adding X and Z")
        self.add_parameters(self.X, self.Z)
@ -134,9 +134,8 @@ class MRD(SparseGP):
        self.bgplvms = []
        self.num_data = Ylist[0].shape[0]
-        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
+        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
            p = Parameterized(name=n)
            p.add_parameter(k)
            p.kern = k
@ -154,19 +153,18 @@ class MRD(SparseGP):
        self.posteriors = []
        self.Z.gradient[:] = 0.
        self.X.gradient[:] = 0.
-        for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
+        for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
            self.logger.info('working on im <{}>'.format(hex(id(i))))
            k, l = b.kern, b.likelihood
            posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
            self.posteriors.append(posterior)
            self._log_marginal_likelihood += lml
            # likelihoods gradients
            self.logger.info("likelihood gradients")
            l.update_gradients(grad_dict.pop('dL_dthetaL'))
            #gradients wrt kernel
            self.logger.info("kernel gradients")
            dL_dKmm = grad_dict.pop('dL_dKmm')
            k.update_gradients_full(dL_dKmm, self.Z, None)
            target = k.gradient.copy()
@ -174,7 +172,6 @@ class MRD(SparseGP):
            k.gradient += target
            #gradients wrt Z
            self.logger.info("Z gradients")
            self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
            self.Z.gradient += k.gradients_Z_expectations(
                               grad_dict['dL_dpsi0'], 
@ -182,16 +179,15 @@ class MRD(SparseGP):
                               grad_dict['dL_dpsi2'], 
                               Z=self.Z, variational_posterior=self.X)
            self.logger.info("X gradients")
            dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
            self.X.mean.gradient += dL_dmean
            self.X.variance.gradient += dL_dS
        # update for the KL divergence
        self.posterior = self.posteriors[0]
-        self.kern = self.kernels[0]
+        self.kern = self.bgplvms[0].kern
-        self.likelihood = self.likelihoods[0]
+        self.likelihood = self.bgplvms[0].likelihood
        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)
        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -268,8 +264,8 @@ class MRD(SparseGP):
        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
        """
        self.posterior = self.posteriors[Yindex]
-        self.kern = self.kernels[Yindex]
+        self.kern = self.bgplvms[0].kern
-        self.likelihood = self.likelihoods[Yindex]
+        self.likelihood = self.bgplvms[0].likelihood
        return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
    #===============================================================================
@ -311,7 +307,7 @@ class MRD(SparseGP):
        """
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        import matplotlib.pyplot as plt
+        from matplotlib import pyplot as plt
        from ..plotting.matplot_dep import dim_reduction_plots
        if "Yindex" not in predict_kwargs:
            predict_kwargs['Yindex'] = 0
@ -333,10 +329,7 @@ class MRD(SparseGP):
        return plot
    def __getstate__(self):
-        # TODO:
+        state = super(MRD, self).__getstate__()
        import copy
        state = copy.copy(self.__dict__)
        del state['kernels']
        del state['kern']
        del state['likelihood']
        return state
@ -344,7 +337,6 @@ class MRD(SparseGP):
    def __setstate__(self, state):
        # TODO:
        super(MRD, self).__setstate__(state)
-        self.kernels = [p.kern for p in self.bgplvms]
+        self.kern = self.bgplvms[0].kern
-        self.kern = self.kernels[0]
+        self.likelihood = self.bgplvms[0].likelihood
        self.likelihood = self.likelihoods[0]
        self.parameters_changed()
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@ -16,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
 from GPy.kern._src.rbf import RBF
 from GPy.kern._src.linear import Linear
 from GPy.kern._src.static import Bias, White
-from GPy.examples.dimensionality_reduction import mrd_simulation,\
+from GPy.examples.dimensionality_reduction import mrd_simulation
    bgplvm_simulation
 from GPy.examples.regression import toy_rbf_1d_50
 from GPy.core.parameterization.variational import NormalPosterior
 from GPy.models.gp_regression import GPRegression
@ -90,6 +89,7 @@ class Test(ListDictTestCase):
        self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
        self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
        pcopy.gradient = 10 # gradient does not get copied anymore
        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
        self.assertSequenceEqual(str(par), str(pcopy))
        self.assertIsNot(par.param_array, pcopy.param_array)
@ -151,6 +151,7 @@ class Test(ListDictTestCase):
        par = NormalPosterior(X,Xv)
        par.gradient = 10
        pcopy = par.copy()
        pcopy.gradient = 10
        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
        self.assertSequenceEqual(str(par), str(pcopy))