merge the current devel into psi2

2026-06-11 15:15:15 +02:00 · 2014-08-11 18:01:23 +01:00 · 2014-08-11 18:01:23 +01:00 · 785c580032
commit 785c580032
parent 9f1bd3ef25 3651374617
49 changed files with 1839 additions and 581 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -12,6 +12,10 @@ from .. import likelihoods
 from ..likelihoods.gaussian import Gaussian
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
 from parameterization.variational import VariationalPosterior
+from scipy.sparse.base import issparse
+
+import logging
+logger = logging.getLogger("GP")

 class GP(Model):
    """
@ -33,14 +37,16 @@ class GP(Model):

        assert X.ndim == 2
        if isinstance(X, (ObsAr, VariationalPosterior)):
-            self.X = X
+            self.X = X.copy()
        else: self.X = ObsAr(X)

        self.num_data, self.input_dim = self.X.shape

        assert Y.ndim == 2
-        self.Y = ObsAr(Y)
-#         assert Y.shape[0] == self.num_data
+        logger.info("initializing Y")
+        if issparse(Y): self.Y = Y
+        else: self.Y = ObsAr(Y)
+        assert Y.shape[0] == self.num_data
        _, self.output_dim = self.Y.shape

        #TODO: check the type of this is okay?
@ -54,6 +60,7 @@ class GP(Model):
        self.likelihood = likelihood

        #find a sensible inference method
+        logger.info("initializing inference method")
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
                inference_method = exact_gaussian_inference.ExactGaussianInference()
@ -62,6 +69,7 @@ class GP(Model):
                print "defaulting to ", inference_method, "for latent function inference"
        self.inference_method = inference_method

+        logger.info("adding kernel and likelihood as parameters")
        self.add_parameter(self.kern)
        self.add_parameter(self.likelihood)

@ -199,9 +207,9 @@ class GP(Model):
        if fillcol is not None:
            kw['fillcol'] = fillcol
        return models_plots.plot_fit(self, plot_limits, which_data_rows,
-                                     which_data_ycols, fixed_inputs, 
-                                     levels, samples, fignum, ax, resolution, 
-                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     which_data_ycols, fixed_inputs,
+                                     levels, samples, fignum, ax, resolution,
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata,
                                     data_symbol=data_symbol, **kw)

    def plot(self, plot_limits=None, which_data_rows='all',
@ -250,9 +258,9 @@ class GP(Model):
        if fillcol is not None:
            kw['fillcol'] = fillcol
        return models_plots.plot_fit(self, plot_limits, which_data_rows,
-                                     which_data_ycols, fixed_inputs, 
-                                     levels, samples, fignum, ax, resolution, 
-                                     plot_raw=plot_raw, Y_metadata=Y_metadata, 
+                                     which_data_ycols, fixed_inputs,
+                                     levels, samples, fignum, ax, resolution,
+                                     plot_raw=plot_raw, Y_metadata=Y_metadata,
                                     data_symbol=data_symbol, **kw)

    def input_sensitivity(self):
@ -276,5 +284,9 @@ class GP(Model):
        TODO: valid args
        """
        self.inference_method.on_optimization_start()
-        super(GP, self).optimize(optimizer, start, **kwargs)
-        self.inference_method.on_optimization_end()
+        try:
+            super(GP, self).optimize(optimizer, start, **kwargs)
+        except KeyboardInterrupt:
+            print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
+            self.inference_method.on_optimization_end()
+            raise
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -20,7 +20,7 @@ class Model(Parameterized):
        super(Model, self).__init__(name)  # Parameterized.__init__(self)
        self.optimization_runs = []
        self.sampling_runs = []
-        self.preferred_optimizer = 'scg'
+        self.preferred_optimizer = 'bfgs'

    def log_likelihood(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"
@ -61,7 +61,7 @@ class Model(Parameterized):
        on the current machine.

        """
-        initial_parameters = self._get_params_transformed()
+        initial_parameters = self.optimizer_array.copy()

        if parallel:
            try:
@ -97,9 +97,9 @@ class Model(Parameterized):

        if len(self.optimization_runs):
            i = np.argmin([o.f_opt for o in self.optimization_runs])
-            self._set_params_transformed(self.optimization_runs[i].x_opt)
+            self.optimizer_array = self.optimization_runs[i].x_opt
        else:
-            self._set_params_transformed(initial_parameters)
+            self.optimizer_array = initial_parameters

    def ensure_default_constraints(self, warning=True):
        """
@ -118,30 +118,32 @@ class Model(Parameterized):
        """
        The objective function for the given algorithm.

-        This function is the true objective, which wants to be minimized. 
-        Note that all parameters are already set and in place, so you just need 
+        This function is the true objective, which wants to be minimized.
+        Note that all parameters are already set and in place, so you just need
        to return the objective function here.

        For probabilistic models this is the negative log_likelihood
-        (including the MAP prior), so we return it here. If your model is not 
-        probabilistic, just return your objective here!
+        (including the MAP prior), so we return it here. If your model is not
+        probabilistic, just return your objective to minimize here!
        """
        return -float(self.log_likelihood()) - self.log_prior()

    def objective_function_gradients(self):
        """
        The gradients for the objective function for the given algorithm.
+        The gradients are w.r.t. the *negative* objective function, as
+        this framework works with *negative* log-likelihoods as a default.

        You can find the gradient for the parameters in self.gradient at all times.
        This is the place, where gradients get stored for parameters.

-        This function is the true objective, which wants to be minimized. 
-        Note that all parameters are already set and in place, so you just need 
+        This function is the true objective, which wants to be minimized.
+        Note that all parameters are already set and in place, so you just need
        to return the gradient here.

        For probabilistic models this is the gradient of the negative log_likelihood
-        (including the MAP prior), so we return it here. If your model is not 
-        probabilistic, just return your gradient here!
+        (including the MAP prior), so we return it here. If your model is not
+        probabilistic, just return your *negative* gradient here!
        """
        return -(self._log_likelihood_gradients() + self._log_prior_gradients())

@ -157,7 +159,8 @@ class Model(Parameterized):
        :type x: np.array
        """
        try:
-            self._set_params_transformed(x)
+            # self._set_params_transformed(x)
+            self.optimizer_array = x
            obj_grads = self._transform_gradients(self.objective_function_gradients())
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -180,7 +183,7 @@ class Model(Parameterized):
        :parameter type: np.array
        """
        try:
-            self._set_params_transformed(x)
+            self.optimizer_array = x
            obj = self.objective_function()
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -192,7 +195,7 @@ class Model(Parameterized):

    def _objective_grads(self, x):
        try:
-            self._set_params_transformed(x)
+            self.optimizer_array = x
            obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
            self._fail_count = 0
        except (LinAlgError, ZeroDivisionError, ValueError):
@ -222,20 +225,24 @@ class Model(Parameterized):
        if self.size == 0:
            raise RuntimeError, "Model without parameters cannot be optimized"

+        if start == None:
+            start = self.optimizer_array
+
        if optimizer is None:
            optimizer = self.preferred_optimizer

-        if start == None:
-            start = self._get_params_transformed()
-
-        optimizer = optimization.get_optimizer(optimizer)
-        opt = optimizer(start, model=self, **kwargs)
+        if isinstance(optimizer, optimization.Optimizer):
+            opt = optimizer
+            opt.model = self
+        else:
+            optimizer = optimization.get_optimizer(optimizer)
+            opt = optimizer(start, model=self, **kwargs)

        opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)

        self.optimization_runs.append(opt)

-        self._set_params_transformed(opt.x_opt)
+        self.optimizer_array = opt.x_opt

    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
        # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@ -246,7 +253,7 @@ class Model(Parameterized):
    def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
        """
        Check the gradient of the ,odel by comparing to a numerical
-        estimate.  If the verbose flag is passed, invividual
+        estimate.  If the verbose flag is passed, individual
        components are tested (and printed)

        :param verbose: If True, print a "full" checking of each parameter
@ -260,7 +267,7 @@ class Model(Parameterized):
           The gradient is considered correct if the ratio of the analytical
           and numerical gradients is within <tolerance> of unity.
        """
-        x = self._get_params_transformed().copy()
+        x = self.optimizer_array.copy()

        if not verbose:
            # make sure only to test the selected parameters
@ -270,8 +277,8 @@ class Model(Parameterized):
                transformed_index = self._raveled_index_for(target_param)
                if self._has_fixes():
                    indices = np.r_[:self.size]
-                    which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
-                    transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
+                    which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
+                    transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]

                if transformed_index.size == 0:
                    print "No free parameters to check"
@ -290,7 +297,7 @@ class Model(Parameterized):
            gradient = gradient[transformed_index]

            denominator = (2 * np.dot(dx, gradient))
-            global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
+            global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
            global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
            if global_ratio is np.nan:
                global_ratio = 0
@ -319,10 +326,10 @@ class Model(Parameterized):
                param_index = self._raveled_index_for(target_param)
                if self._has_fixes():
                    indices = np.r_[:self.size]
-                    which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
+                    which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
                    param_index = param_index[which[0]]
-                    transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
-                    #print param_index, transformed_index
+                    transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
+                    # print param_index, transformed_index
                else:
                    transformed_index = param_index

@ -340,9 +347,9 @@ class Model(Parameterized):
                xx[xind] -= 2.*step
                f2 = self._objective(xx)
                numerical_gradient = (f1 - f2) / (2 * step)
-                if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
+                if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
                else: ratio = (f1 - f2) / (2 * step * gradient[xind])
-                difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
+                difference = np.abs(numerical_gradient - gradient[xind])

                if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
                    formatted_name = "\033[92m {0} \033[0m".format(names[nind])
@ -358,7 +365,7 @@ class Model(Parameterized):
                grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                print grad_string

-            self._set_params_transformed(x)
+            self.optimizer_array = x
            return ret


--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@ -77,8 +77,18 @@ class ObserverList(object):
            self._poc.insert(ins, (priority, weakref.ref(observer), callble))

    def __str__(self):
+        from . import ObsAr, Param
+        from parameter_core import Parameterizable
        ret = []
        curr_p = None
+        
+        def frmt(o):
+            if isinstance(o, ObsAr):
+                return 'ObsArr <{}>'.format(hex(id(o)))
+            elif isinstance(o, (Param,Parameterizable)):
+                return '{}'.format(o.hierarchy_name())
+            else:
+                return repr(o)                
        for p, o, c in self:
            curr = ''
            if curr_p != p:
@ -87,8 +97,9 @@ class ObserverList(object):
            else: curr_pre = " "*len(pre)
            curr_p = p
            curr += curr_pre
-            ret.append(curr + ", ".join(map(repr, [o,c])))
-        return '\n'.join(ret)
+            
+            ret.append(curr + ", ".join([frmt(o), str(c)]))
+            return '\n'.join(ret)

    def flush(self):
        """
--- a/GPy/core/parameterization/observable_array.py
+++ b/GPy/core/parameterization/observable_array.py
@ -30,16 +30,22 @@ class ObsAr(np.ndarray, Pickleable, Observable):
    def __array_wrap__(self, out_arr, context=None):
        return out_arr.view(np.ndarray)

+    def _setup_observers(self):
+        # do not setup anything, as observable arrays do not have default observers
+        pass
+
    def copy(self):
+        from lists_and_dicts import ObserverList
        memo = {}
        memo[id(self)] = self
+        memo[id(self.observers)] = ObserverList()
        return self.__deepcopy__(memo)

    def __deepcopy__(self, memo):
        s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
        memo[id(self)] = s
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo))
+        Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
        return s

    def __reduce__(self):
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -4,7 +4,7 @@
 import itertools
 import numpy
 np = numpy
-from parameter_core import Parameterizable, adjust_name_for_printing
+from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
 from observable_array import ObsAr

 ###### printing
@ -173,36 +173,6 @@ class Param(Parameterizable, ObsAr):
    def _ensure_fixes(self):
        if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)

-    #===========================================================================
-    # parameterizable
-    #===========================================================================
-    def traverse(self, visit, *args, **kwargs):
-        """
-        Traverse the hierarchy performing visit(self, *args, **kwargs) at every node passed by.
-        See "visitor pattern" in literature. This is implemented in pre-order fashion.
-
-        This will function will just call visit on self, as Param are leaf nodes.
-        """
-        self.__visited = True
-        visit(self, *args, **kwargs)
-        self.__visited = False
-
-    def traverse_parents(self, visit, *args, **kwargs):
-        """
-        Traverse the hierarchy upwards, visiting all parents and their children, except self.
-        See "visitor pattern" in literature. This is implemented in pre-order fashion.
-
-        Example:
-
-        parents = []
-        self.traverse_parents(parents.append)
-        print parents
-        """
-        if self.has_parent():
-            self.__visited = True
-            self._parent_._traverse_parents(visit, *args, **kwargs)
-            self.__visited = False
-
    #===========================================================================
    # Convenience
    #===========================================================================
@ -217,14 +187,24 @@ class Param(Parameterizable, ObsAr):
    #===========================================================================
    # Pickling and copying
    #===========================================================================
+    def copy(self):
+        return Parameterizable.copy(self, which=self)
+    
    def __deepcopy__(self, memo):
        s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
-        memo[id(self)] = s
+        memo[id(self)] = s        
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo))
+        Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
        return s
-
-
+    def _setup_observers(self):
+        """
+        Setup the default observers
+        
+        1: pass through to parent, if present
+        """
+        if self.has_parent():
+            self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
+    
    #===========================================================================
    # Printing -> done
    #===========================================================================
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -16,8 +16,9 @@ Observable Pattern for patameterization
 from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np
 import re
+import logging

-__updated__ = '2014-05-20'
+__updated__ = '2014-05-21'

 class HierarchyError(Exception):
    """
@ -49,7 +50,6 @@ class Observable(object):
    as an observer. Every time the observable changes, it sends a notification with
    self as only argument to all its observers.
    """
-    _updated = True
    _updates = True
    def __init__(self, *args, **kwargs):
        super(Observable, self).__init__()
@ -58,26 +58,32 @@ class Observable(object):

    @property
    def updates(self):
-        self._updates = self._highest_parent_._updates
+        p = getattr(self, '_highest_parent_', None)
+        if p is not None:
+            self._updates = p._updates
        return self._updates

    @updates.setter
    def updates(self, ups):
        assert isinstance(ups, bool), "updates are either on (True) or off (False)"
-        self._highest_parent_._updates = ups
+        p = getattr(self, '_highest_parent_', None)
+        if p is not None:
+            p._updates = ups
+        else:
+            self._updates = ups
        if ups:
            self._trigger_params_changed()

    def add_observer(self, observer, callble, priority=0):
        """
-        Add an observer `observer` with the callback `callble` 
+        Add an observer `observer` with the callback `callble`
        and priority `priority` to this observers list.
        """
        self.observers.add(priority, observer, callble)

    def remove_observer(self, observer, callble=None):
        """
-        Either (if callble is None) remove all callables, 
+        Either (if callble is None) remove all callables,
        which were added alongside observer,
        or remove callable `callble` which was added alongside
        the observer `observer`.
@ -86,7 +92,7 @@ class Observable(object):
        for poc in self.observers:
            _, obs, clble = poc
            if callble is not None:
-                if (obs == observer) and (callble == clble):
+                if (obs is observer) and (callble == clble):
                    to_remove.append(poc)
            else:
                if obs is observer:
@ -172,6 +178,7 @@ class Pickleable(object):
    """
    def __init__(self, *a, **kw):
        super(Pickleable, self).__init__()
+
    #===========================================================================
    # Pickling operations
    #===========================================================================
@ -192,37 +199,46 @@ class Pickleable(object):
    #===========================================================================
    # copy and pickling
    #===========================================================================
-    def copy(self):
+    def copy(self, memo=None, which=None):
        """
-        Returns a (deep) copy of the current parameter handle. 
+        Returns a (deep) copy of the current parameter handle.

        All connections to parents of the copy will be cut.
+
+        :param dict memo: memo for deepcopy
+        :param Parameterized which: parameterized object which started the copy process [default: self]
        """
        #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
+        if memo is None:
+            memo = {}
        import copy
-        memo = {}
        # the next part makes sure that we do not include parents in any form:
        parents = []
-        self.traverse_parents(parents.append) # collect parents
+        if which is None:
+            which = self
+        which.traverse_parents(parents.append) # collect parents
        for p in parents:
-            memo[id(p)] = None # set all parents to be None, so they will not be copied
-        memo[id(self.gradient)] = None # reset the gradient
-        memo[id(self.param_array)] = None # and param_array
-        memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
-        c = copy.deepcopy(self, memo) # and start the copy
-        c._parent_index_ = None
-        return c
+            if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
+        if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
+        if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
+        copy = copy.deepcopy(self, memo) # and start the copy
+        copy._parent_index_ = None
+        copy._trigger_params_changed()
+        return copy

    def __deepcopy__(self, memo):
        s = self.__new__(self.__class__) # fresh instance
        memo[id(self)] = s # be sure to break all cycles --> self is already done
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
+        s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
        return s

    def __getstate__(self):
        ignore_list = ['_param_array_', # parameters get set from bottom to top
                       '_gradient_array_', # as well as gradients
+                       '_optimizer_copy_',
+                       'logger',
+                       'observers',
                       '_fixes_', # and fixes
                       '_Cacher_wrap__cachers', # never pickle cachers
                       ]
@ -231,10 +247,14 @@ class Pickleable(object):
            if k not in ignore_list:
                dc[k] = v
        return dc
- 
+
    def __setstate__(self, state):
        self.__dict__.update(state)
-        return self
+        from lists_and_dicts import ObserverList
+        self.observers = ObserverList()
+        self._setup_observers()
+        self._optimizer_copy_transformed = False
+

 class Gradcheckable(Pickleable, Parentable):
    """
@ -261,7 +281,7 @@ class Gradcheckable(Pickleable, Parentable):
        """
        if self.has_parent():
            return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
-        return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
+        return self._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)

    def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
        """
@ -352,8 +372,9 @@ class Indexable(Nameable, Observable):
        basically just sums up the parameter sizes which come before param.
        """
        if param.has_parent():
-            if param._parent_._get_original(param) in self.parameters:
-                return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
+            p = param._parent_._get_original(param)
+            if p in self.parameters:
+                return reduce(lambda a,b: a + b.size, self.parameters[:p._parent_index_], 0)
            return self._offset_for(param._parent_) + param._parent_._offset_for(param)
        return 0

@ -387,7 +408,6 @@ class Indexable(Nameable, Observable):
        if value is not None:
            self[:] = value

-        #index = self._raveled_index()
        index = self.unconstrain()
        index = self._add_to_index_operations(self.constraints, index, __fixed__, warning)
        self._highest_parent_._set_fixed(self, index)
@ -423,12 +443,12 @@ class Indexable(Nameable, Observable):
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED

    def _connect_fixes(self):
-        from ties_and_remappings import Tie
-        self._ensure_fixes()
-        [np.put(self._fixes_, ind, FIXED) for c, ind in self.constraints.iteritems()
-            if c == __fixed__ or isinstance(c,Tie)]
-        if np.all(self._fixes_): self._fixes_ = None
-        if self.constraints[__fixed__]==0:
+        fixed_indices = self.constraints[__fixed__]
+        if fixed_indices.size > 0:
+            self._ensure_fixes()
+            self._fixes_[fixed_indices] = FIXED
+        else:
+            self._fixes_ = None
            del self.constraints[__fixed__]

    #===========================================================================
@ -495,32 +515,6 @@ class Indexable(Nameable, Observable):
    #===========================================================================
    # Constrain operations -> done
    #===========================================================================
-    
-    def tie(self, name):
-        from ties_and_remappings import Tie
-        #remove any constraints
-        old_const = [c for c in self.constraints.properties() if not isinstance(c,Tie)]
-        self.unconstrain()
-
-        #see if a tie exists with that name
-        if name in self._highest_parent_.ties:
-            t = self._highest_parent_.ties[name]
-        else:
-            #create a tie object
-            value = np.atleast_1d(self.param_array)[0]*1
-            t = Tie(value=value, name=name)
-
-            #add the new tie object to the global index
-            self._highest_parent_.ties[name] = t
-            self._highest_parent_.add_parameter(t)
-
-            #constrain the tie as we were constrained
-            if len(old_const)>0:
-                t.constrain(old_const[0])
-        
-        self.constraints.add(t, self._raveled_index())
-        t.add_tied_parameter(self)
-        self._highest_parent_._connect_fixes()

    def constrain(self, transform, warning=True, trigger_parent=True):
        """
@ -638,48 +632,78 @@ class OptimizationHandlable(Indexable):
    """
    This enables optimization handles on an Object as done in GPy 0.4.

-    `..._transformed`: make sure the transformations and constraints etc are handled
+    `..._optimizer_copy_transformed`: make sure the transformations and constraints etc are handled
    """
    def __init__(self, name, default_constraint=None, *a, **kw):
        super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
+        self._optimizer_copy_ = None
+        self._optimizer_copy_transformed = False

-    def _get_params_transformed(self):
-        # transformed parameters (apply un-transformation rules)
-        p = self.param_array.copy()
-        from ties_and_remappings import Tie
-        [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__ and not isinstance(c,Tie)]
-        if self.has_parent() and self.constraints[__fixed__].size != 0:
-            fixes = np.ones(self.size).astype(bool)
-            [np.put(fixes,ind,FIXED) for c, ind in self.constraints.iteritems()
-             if c == __fixed__ or isinstance(c,Tie)]
-            return p[fixes]
-        elif self._has_fixes():
-            return p[self._fixes_]
-        return p
+    #===========================================================================
+    # Optimizer copy
+    #===========================================================================
+    @property
+    def optimizer_array(self):
+        """
+        Array for the optimizer to work on.
+        This array always lives in the space for the optimizer.
+        Thus, it is untransformed, going from Transformations.

-    def _set_params_transformed(self, p):
+        Setting this array, will make sure the transformed parameters for this model
+        will be set accordingly. It has to be set with an array, retrieved from
+        this method, as e.g. fixing will resize the array.
+
+        The optimizer should only interfere with this array, such that transofrmations
+        are secured.
        """
-        Set parameters p, but make sure they get transformed before setting.
-        This means, the optimizer sees p, whereas the model sees transformed(p), 
-        such that, the parameters the model sees are in the right domain.
-        """
-        from ties_and_remappings import Tie
-        if not(p is self.param_array):
+        if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
+            self._optimizer_copy_ = np.empty(self.size)
+
+        if not self._optimizer_copy_transformed:
+            self._optimizer_copy_.flat = self.param_array.flat
+            [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
            if self.has_parent() and self.constraints[__fixed__].size != 0:
                fixes = np.ones(self.size).astype(bool)
-#                 fixes[self.constraints[__fixed__]] = FIXED
-                for c, ind in self.constraints.iteritems():
-                    if c == __fixed__ or isinstance(c,Tie):
-                        fixes[ind] = FIXED
-                self.param_array.flat[fixes] = p
-            elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
-            else: self.param_array.flat = p
-        [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
-         for c, ind in self.constraints.iteritems() if c != __fixed__ and not isinstance(c,Tie)]
-        [np.put(self.param_array, ind, c.val)
-         for c, ind in self.constraints.iteritems() if isinstance(c,Tie)]
+                fixes[self.constraints[__fixed__]] = FIXED
+                return self._optimizer_copy_[fixes]
+            elif self._has_fixes():
+                return self._optimizer_copy_[self._fixes_]
+            self._optimizer_copy_transformed = True
+
+        return self._optimizer_copy_
+
+    @optimizer_array.setter
+    def optimizer_array(self, p):
+        """
+        Make sure the optimizer copy does not get touched, thus, we only want to
+        set the values *inside* not the array itself.
+
+        Also we want to update param_array in here.
+        """
+        f = None
+        if self.has_parent() and self.constraints[__fixed__].size != 0:
+            f = np.ones(self.size).astype(bool)
+            f[self.constraints[__fixed__]] = FIXED
+        elif self._has_fixes():
+            f = self._fixes_
+        if f is None:
+            self.param_array.flat = p
+            [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
+             for c, ind in self.constraints.iteritems() if c != __fixed__]
+        else:
+            self.param_array.flat[f] = p
+            [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
+             for c, ind in self.constraints.iteritems() if c != __fixed__]
+
+        self._optimizer_copy_transformed = False
        self._trigger_params_changed()

+    def _get_params_transformed(self):
+        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
+#
+    def _set_params_transformed(self, p):
+        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
+
    def _trigger_params_changed(self, trigger_parent=True):
        """
        First tell all children to update,
@ -687,7 +711,7 @@ class OptimizationHandlable(Indexable):

        If trigger_parent is True, we will tell the parent, otherwise not.
        """
-        [p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
+        [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
        self.notify_observers(None, None if trigger_parent else -np.inf)

    def _size_transformed(self):
@ -702,11 +726,7 @@ class OptimizationHandlable(Indexable):
        Transform the gradients by multiplying the gradient factor for each
        constraint to it.
        """
-        if self.has_parent():
-            return g
-        from ties_and_remappings import Tie
-        [np.put(g, self._raveled_index_for(c.val), g[i].sum()) for c, i in self.constraints.iteritems() if isinstance(c,Tie)]
-        [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__ and not isinstance(c,Tie)]
+        [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes(): return g[self._fixes_]
        return g

@ -746,7 +766,7 @@ class OptimizationHandlable(Indexable):
    #===========================================================================
    # Randomizeable
    #===========================================================================
-    def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs):
+    def randomize(self, rand_gen=np.random.normal, *args, **kwargs):
        """
        Randomize the model.
        Make this draw from the prior if one exists, else draw from given random generator
@ -757,10 +777,10 @@ class OptimizationHandlable(Indexable):
        :param args, kwargs: will be passed through to random number generator
        """
        # first take care of all parameters (from N(0,1))
-        x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
+        x = rand_gen(size=self._size_transformed(), *args, **kwargs)
        # now draw from prior where possible
        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
-        self._set_params_transformed(x)  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)

    #===========================================================================
    # For shared memory arrays. This does nothing in Param, but sets the memory
@ -788,6 +808,11 @@ class OptimizationHandlable(Indexable):
        1.) connect param_array of children to self.param_array
        2.) tell all children to propagate further
        """
+        if self.param_array.size != self.size:
+            self._param_array_ = np.empty(self.size, dtype=np.float64)
+        if self.gradient.size != self.size:
+            self._gradient_array_ = np.empty(self.size, dtype=np.float64)
+
        pi_old_size = 0
        for pi in self.parameters:
            pislice = slice(pi_old_size, pi_old_size + pi.size)
@ -801,6 +826,9 @@ class OptimizationHandlable(Indexable):
            pi._propagate_param_grad(parray[pislice], garray[pislice])
            pi_old_size += pi.size

+    def _connect_parameters(self):
+        pass
+
 class Parameterizable(OptimizationHandlable):
    """
    A parameterisable class.
@ -819,26 +847,48 @@ class Parameterizable(OptimizationHandlable):
        self.parameters = ArrayList()
        self._param_array_ = None
        self._added_names_ = set()
+        self.logger = logging.getLogger(self.__class__.__name__)
        self.__visited = False # for traversing in reverse order we need to know if we were here already
-        self.ties = {}

    @property
    def param_array(self):
        """
        Array representing the parameters of this class.
        There is only one copy of all parameters in memory, two during optimization.
+
+        !WARNING!: setting the parameter array MUST always be done in memory:
+        m.param_array[:] = m_copy.param_array
        """
        if self.__dict__.get('_param_array_', None) is None:
            self._param_array_ = np.empty(self.size, dtype=np.float64)
        return self._param_array_

+    @property
+    def unfixed_param_array(self):
+        """
+        Array representing the parameters of this class.
+        There is only one copy of all parameters in memory, two during optimization.
+
+        !WARNING!: setting the parameter array MUST always be done in memory:
+        m.param_array[:] = m_copy.param_array
+        """
+        if self.__dict__.get('_param_array_', None) is None:
+            self._param_array_ = np.empty(self.size, dtype=np.float64)
+                    
+        if self.constraints[__fixed__].size !=0:
+            fixes = np.ones(self.size).astype(bool)
+            fixes[self.constraints[__fixed__]] = FIXED
+            return self._param_array_[fixes]
+        else:
+            return self._param_array_
+
    @param_array.setter
    def param_array(self, arr):
        self._param_array_ = arr

    def traverse(self, visit, *args, **kwargs):
        """
-        Traverse the hierarchy performing visit(self, *args, **kwargs) 
+        Traverse the hierarchy performing visit(self, *args, **kwargs)
        at every node passed by downwards. This function includes self!

        See "visitor pattern" in literature. This is implemented in pre-order fashion.
@ -930,14 +980,33 @@ class Parameterizable(OptimizationHandlable):
        self._remove_parameter_name(None, old_name)
        self._add_parameter_name(param)

+    def __setstate__(self, state):
+        super(Parameterizable, self).__setstate__(state)
+        self.logger = logging.getLogger(self.__class__.__name__)
+        return self
+
    #===========================================================================
    # notification system
    #===========================================================================
    def _parameters_changed_notification(self, me, which=None):
+        """
+        In parameterizable we just need to make sure, that the next call to optimizer_array
+        will update the optimizer_array to the latest parameters
+        """
+        self._optimizer_copy_transformed = False # tells the optimizer array to update on next request
        self.parameters_changed()
    def _pass_through_notify_observers(self, me, which=None):
        self.notify_observers(which=which)
+    def _setup_observers(self):
+        """
+        Setup the default observers

+        1: parameters_changed_notify
+        2: pass through to parent, if present
+        """
+        self.add_observer(self, self._parameters_changed_notification, -100)
+        if self.has_parent():
+            self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
    #===========================================================================
    # From being parentable, we have to define the parent_change notification
    #===========================================================================
@ -956,4 +1025,3 @@ class Parameterizable(OptimizationHandlable):
        updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
        """
        pass
-
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -8,11 +8,23 @@ from re import compile, _pattern_type
 from param import ParamConcatenation
 from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing

+import logging
+logger = logging.getLogger("parameters changed meta")
+
 class ParametersChangedMeta(type):
    def __call__(self, *args, **kw):
-        instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
-        instance.parameters_changed()
-        return instance
+        self._in_init_ = True
+        #import ipdb;ipdb.set_trace()
+        self = super(ParametersChangedMeta, self).__call__(*args, **kw)
+        logger.debug("finished init")
+        self._in_init_ = False
+        logger.debug("connecting parameters")
+        self._highest_parent_._connect_parameters()
+        self._highest_parent_._notify_parent_change()
+        self._highest_parent_._connect_fixes()
+        logger.debug("calling parameters changed")
+        self.parameters_changed()
+        return self

 class Parameterized(Parameterizable):
    """
@ -57,21 +69,19 @@ class Parameterized(Parameterizable):
        and concatenate them. Printing m[''] will result in printing of all parameters in detail.
    """
    #===========================================================================
-    # Metaclass for parameters changed after init. 
+    # Metaclass for parameters changed after init.
    # This makes sure, that parameters changed will always be called after __init__
-    # **Never** call parameters_changed() yourself 
+    # **Never** call parameters_changed() yourself
    __metaclass__ = ParametersChangedMeta
    #===========================================================================
    def __init__(self, name=None, parameters=[], *a, **kw):
        super(Parameterized, self).__init__(name=name, *a, **kw)
-        self._in_init_ = True
        self.size = sum(p.size for p in self.parameters)
        self.add_observer(self, self._parameters_changed_notification, -100)
        if not self._has_fixes():
            self._fixes_ = None
        self._param_slices_ = []
-        self._connect_parameters()
-        del self._in_init_
+        #self._connect_parameters()
        self.add_parameters(*parameters)

    def build_pydot(self, G=None):
@ -125,6 +135,9 @@ class Parameterized(Parameterizable):
                param._parent_.remove_parameter(param)
            # make sure the size is set
            if index is None:
+                start = sum(p.size for p in self.parameters)
+                self.constraints.shift_right(start, param.size)
+                self.priors.shift_right(start, param.size)
                self.constraints.update(param.constraints, self.size)
                self.priors.update(param.priors, self.size)
                self.parameters.append(param)
@ -143,14 +156,16 @@ class Parameterized(Parameterizable):
                parent.size += param.size
                parent = parent._parent_

-            self._connect_parameters()
+            if not self._in_init_:
+                self._connect_parameters()
+                self._notify_parent_change()

-            self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
-            self._highest_parent_._notify_parent_change()
-            self._highest_parent_._connect_fixes()
+                self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
+                self._highest_parent_._notify_parent_change()
+                self._highest_parent_._connect_fixes()

        else:
-            raise HierarchyError, """Parameter exists already and no copy made"""
+            raise HierarchyError, """Parameter exists already, try making a copy"""


    def add_parameters(self, *parameters):
@ -198,26 +213,28 @@ class Parameterized(Parameterizable):
            # no parameters for this class
            return
        if self.param_array.size != self.size:
-            self.param_array = np.empty(self.size, dtype=np.float64)
+            self._param_array_ = np.empty(self.size, dtype=np.float64)
        if self.gradient.size != self.size:
            self._gradient_array_ = np.empty(self.size, dtype=np.float64)

        old_size = 0
        self._param_slices_ = []
        for i, p in enumerate(self.parameters):
+            if not p.param_array.flags['C_CONTIGUOUS']:
+                raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
+
            p._parent_ = self
            p._parent_index_ = i

            pslice = slice(old_size, old_size + p.size)
+
            # first connect all children
            p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
+
            # then connect children to self
            self.param_array[pslice] = p.param_array.flat  # , requirements=['C', 'W']).ravel(order='C')
            self.gradient_full[pslice] = p.gradient_full.flat  # , requirements=['C', 'W']).ravel(order='C')

-            if not p.param_array.flags['C_CONTIGUOUS']:
-                raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
-
            p.param_array.data = self.param_array[pslice].data
            p.gradient_full.data = self.gradient_full[pslice].data

@ -292,12 +309,16 @@ class Parameterized(Parameterizable):
        except Exception as e:
            print "WARNING: caught exception {!s}, trying to continue".format(e)

-    def copy(self):
-        c = super(Parameterized, self).copy()
-        c._connect_parameters()
-        c._connect_fixes()
-        c._notify_parent_change()
-        return c
+    def copy(self, memo=None):
+        if memo is None:
+            memo = {}
+        memo[id(self.optimizer_array)] = None # and param_array
+        memo[id(self.param_array)] = None # and param_array
+        copy = super(Parameterized, self).copy(memo)
+        copy._connect_parameters()
+        copy._connect_fixes()
+        copy._notify_parent_change()
+        return copy

    #===========================================================================
    # Printing:
@ -328,7 +349,7 @@ class Parameterized(Parameterizable):
    def __str__(self, header=True):

        name = adjust_name_for_printing(self.name) + "."
-        constrs = self._constraints_str; 
+        constrs = self._constraints_str;
        ts = self._ties_str
        prirs = self._priors_str
        desc = self._description_str; names = self.parameter_names()
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -76,11 +76,11 @@ class Uniform(Prior):
        o = super(Prior, cls).__new__(cls, lower, upper)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()
-    
+
    def __init__(self, lower, upper):
        self.lower = float(lower)
        self.upper = float(upper)
-    
+
    def __str__(self):
        return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'

@ -93,7 +93,7 @@ class Uniform(Prior):

    def rvs(self, n):
        return np.random.uniform(self.lower, self.upper, size=n)
-    
+
 class LogGaussian(Prior):
    """
    Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
@ -246,7 +246,7 @@ class Gamma(Prior):
        """
        Creates an instance of a Gamma Prior  by specifying the Expected value(s)
        and Variance(s) of the distribution.
-    
+
        :param E: expected value
        :param V: variance
        """
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@ -38,6 +38,7 @@ class SpikeAndSlabPrior(VariationalPrior):
        super(VariationalPrior, self).__init__(name=name, **kw)
        self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
        self.variance = Param('variance',variance)
+        self.learnPi = learnPi
        if learnPi:
            self.add_parameters(self.pi)

@ -58,12 +59,13 @@ class SpikeAndSlabPrior(VariationalPrior):
        gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
        mu.gradient -= gamma*mu/self.variance
        S.gradient -= (1./self.variance - 1./S) * gamma /2.
-        if len(self.pi)==1:
-            self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
-        if len(self.pi.shape)==1:
-            self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
-        else:
-            self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi))
+        if self.learnPi:
+            if len(self.pi)==1:
+                self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
+            elif len(self.pi.shape)==1:
+                self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
+            else:
+                self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi))

 class VariationalPosterior(Parameterized):
    def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -8,6 +8,9 @@ from ..inference.latent_function_inference import var_dtc
 from .. import likelihoods
 from parameterization.variational import VariationalPosterior

+import logging
+logger = logging.getLogger("sparse gp")
+
 class SparseGP(GP):
    """
    A general purpose Sparse GP model
@ -46,7 +49,7 @@ class SparseGP(GP):
        self.num_inducing = Z.shape[0]

        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
-
+        logger.info("Adding Z as parameter")
        self.add_parameter(self.Z, index=0)

    def has_uncertain_inputs(self):
@ -57,19 +60,23 @@ class SparseGP(GP):
        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
-            dL_dKmm = self.grad_dict.pop('dL_dKmm')
+            dL_dKmm = self.grad_dict['dL_dKmm']
            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
            target = self.kern.gradient.copy()
-            self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
+            self.kern.update_gradients_expectations(variational_posterior=self.X,
+                                                    Z=self.Z,
+                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
            self.kern.gradient += target

            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
            self.Z.gradient += self.kern.gradients_Z_expectations(
-                               self.grad_dict['dL_dpsi0'], 
-                               self.grad_dict['dL_dpsi1'], 
-                               self.grad_dict['dL_dpsi2'], 
-                               Z=self.Z, 
+                               self.grad_dict['dL_dpsi0'],
+                               self.grad_dict['dL_dpsi1'],
+                               self.grad_dict['dL_dpsi2'],
+                               Z=self.Z,
                               variational_posterior=self.X)
        else:
            #gradients wrt kernel
--- a/GPy/gpy_config.cfg
+++ b/GPy/gpy_config.cfg
@ -1,9 +1,14 @@
-# This is the configuration file for GPy
+# This is the default configuration file for GPy

+# Do note edit this file.
+
+# For machine specific changes (i.e. those specific to a given installation) edit GPy/installation.cfg
+
+# For user specific changes edit $HOME/.gpy_user.cfg
 [parallel]
 # Enable openmp support. This speeds up some computations, depending on the number
 # of cores available. Setting up a compiler with openmp support can be difficult on
-# some platforms, hence this option.
+# some platforms, hence by default it is off.
 openmp=False

 [datasets]
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -37,7 +37,7 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
    # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)

    p = .3
-    
+
    m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)

    if nan:
@ -99,7 +99,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
        m.kern.plot_ARD()
    return m

-def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4, sigma=.2):
+def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
    import GPy
    from GPy.util.datasets import swiss_roll_generated
    from GPy.models import BayesianGPLVM
@ -144,16 +144,15 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
    m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
    m.data_colors = c
    m.data_t = t
-    m['noise_variance'] = Y.var() / 100.

    if optimize:
-        m.optimize('scg', messages=verbose, max_iters=2e3)
+        m.optimize('bfgs', messages=verbose, max_iters=2e3)

    if plot:
        fig = plt.figure('fitted')
        ax = fig.add_subplot(111)
        s = m.input_sensitivity().argsort()[::-1][:2]
-        ax.scatter(*m.X.T[s], c=c)
+        ax.scatter(*m.X.mean.T[s], c=c)

    return m

@ -170,16 +169,43 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
    Y = data['X'][:N]
    m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
    m.data_labels = data['Y'][:N].argmax(axis=1)
-    
+
    if optimize:
-        m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
+        m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)

    if plot:
        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        m.plot_latent(ax=latent_axes, labels=m.data_labels)
        data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
-            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
+            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
+        raw_input('Press enter to finish')
+        plt.close(fig)
+    return m
+
+def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
+    import GPy
+    from matplotlib import pyplot as plt
+    from ..util.misc import param_to_array
+    import numpy as np
+
+    _np.random.seed(0)
+    data = GPy.util.datasets.oil()
+
+    kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
+    Y = data['X'][:N]
+    m = GPy.models.SSGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
+    m.data_labels = data['Y'][:N].argmax(axis=1)
+
+    if optimize:
+        m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
+
+    if plot:
+        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
+        m.plot_latent(ax=latent_axes, labels=m.data_labels)
+        data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
        raw_input('Press enter to finish')
        plt.close(fig)
    return m
@ -322,15 +348,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
    from GPy.models import BayesianGPLVM
    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData

-    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
+    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
    Y = Ylist[0]
    k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)

-    inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
-    Y[inan] = _np.nan
+    inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data
+    Ymissing = Y.copy()
+    Ymissing[inan] = _np.nan

-    m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, 
+    m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
                      inference_method=VarDTCMissingData(inan=inan), kernel=k)

    m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
@ -390,7 +417,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
    for inan in inanlist:
        imlist.append(VarDTCMissingData(limit=1, inan=inan))

-    m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, 
+    m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
            kernel=k, inference_method=imlist,
            initx="random", initz='permute', **kw)

@ -411,18 +438,17 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
    Yn = Y - Y.mean()
    Yn /= Yn.std()

-    m = GPy.models.GPLVM(Yn, Q)
+    m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)

    # optimize
-    m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped())

-    if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
+    if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)

    if plot:
        ax = m.plot_latent(which_indices=(0, 1))
-        y = m.likelihood.Y[0, :]
+        y = m.Y[0, :]
        data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
-        GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
+        lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
        raw_input('Press enter to finish')

    return m
@ -436,13 +462,14 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
    Yn = Y - Y.mean()
    Yn /= Yn.std()

-    m = GPy.models.GPLVM(Yn, Q)
-    if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
+    m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
+
+    if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
    if plot:
        ax = m.plot_latent(which_indices=(0, 1))
-        y = m.likelihood.Y[0, :]
+        y = m.Y[0, :]
        data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
-        GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
+        lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
        raw_input('Press enter to finish')

    return m
@ -525,9 +552,8 @@ def robot_wireless(optimize=True, verbose=True, plot=True):

    data = GPy.util.datasets.robot_wireless()
    # optimize
-    m = GPy.models.GPLVM(data['Y'], 2)
+    m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
    if optimize: m.optimize(messages=verbose, max_f_eval=10000)
-    m._set_params(m._get_params())
    if plot:
        m.plot_latent()

@ -541,14 +567,18 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):

    data = GPy.util.datasets.osu_run1()
    Q = 6
-    kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) 
+    kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
    m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)

    m.data = data
    m.likelihood.variance = 0.001

    # optimize
-    if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
+    try:
+        if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
+    except KeyboardInterrupt:
+        print "Keyboard interrupt, continuing to plot and return"
+
    if plot:
        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        plt.sca(latent_axes)
@ -589,7 +619,7 @@ def ssgplvm_simulation_linear():
    import GPy
    N, D, Q = 1000, 20, 5
    pi = 0.2
-    
+
    def sample_X(Q, pi):
        x = np.empty(Q)
        dies = np.random.rand(Q)
@ -599,7 +629,7 @@ def ssgplvm_simulation_linear():
            else:
                x[q] = 0.
        return x
-    
+
    Y = np.empty((N,D))
    X = np.empty((N,Q))
    # Generate data from random sampled weight matrices
@ -607,4 +637,4 @@ def ssgplvm_simulation_linear():
        X[n] = sample_X(Q,pi)
        w = np.random.randn(D,Q)
        Y[n] = np.dot(w,X[n])
-    
+
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -32,7 +32,7 @@ class EP(LatentFunctionInference):
        pass

    def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
-        num_data, output_dim = X.shape
+        num_data, output_dim = Y.shape
        assert output_dim ==1, "ep in 1D only (for now!)"

        K = kern.K(X)
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@ -56,7 +56,7 @@ class EPDTC(LatentFunctionInference):
        self._ep_approximation = None

    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
-        num_data, output_dim = X.shape
+        num_data, output_dim = Y.shape
        assert output_dim ==1, "ep in 1D only (for now!)"

        Kmm = kern.K(Z)
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -9,6 +9,8 @@ import numpy as np
 from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
+import logging, itertools
+logger = logging.getLogger('vardtc')

 class VarDTC(LatentFunctionInference):
    """
@ -180,11 +182,12 @@ class VarDTC(LatentFunctionInference):
        return post, log_marginal, grad_dict

 class VarDTCMissingData(LatentFunctionInference):
-    const_jitter = 1e-6
+    const_jitter = 1e-10
    def __init__(self, limit=1, inan=None):
        from ...util.caching import Cacher
        self._Y = Cacher(self._subarray_computations, limit)
-        self._inan = inan
+        if inan is not None: self._inan = ~inan
+        else: self._inan = None
        pass

    def set_limit(self, limit):
@ -205,21 +208,35 @@ class VarDTCMissingData(LatentFunctionInference):
        if self._inan is None:
            inan = np.isnan(Y)
            has_none = inan.any()
+            self._inan = ~inan
        else:
            inan = self._inan
            has_none = True
        if has_none:
-            from ...util.subarray_and_sorting import common_subarrays
-            self._subarray_indices = []
-            for v,ind in common_subarrays(inan, 1).iteritems():
-                if not np.all(v):
-                    v = ~np.array(v, dtype=bool)
-                    ind = np.array(ind, dtype=int)
-                    if ind.size == Y.shape[1]:
-                        ind = slice(None)
-                    self._subarray_indices.append([v,ind])
-            Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices]
-            traces = [(y**2).sum() for y in Ys]
+            #print "caching missing data slices, this can take several minutes depending on the number of unique dimensions of the data..."
+            #csa = common_subarrays(inan, 1)
+            size = Y.shape[1]
+            #logger.info('preparing subarrays {:3.3%}'.format((i+1.)/size))
+            Ys = []
+            next_ten = [0.]
+            count = itertools.count()
+            for v, y in itertools.izip(inan.T, Y.T[:,:,None]):
+                i = count.next()
+                if ((i+1.)/size) >= next_ten[0]:
+                    logger.info('preparing subarrays {:>6.1%}'.format((i+1.)/size))
+                    next_ten[0] += .1
+                Ys.append(y[v,:])
+
+            next_ten = [0.]
+            count = itertools.count()
+            def trace(y):
+                i = count.next()
+                if ((i+1.)/size) >= next_ten[0]:
+                    logger.info('preparing traces {:>6.1%}'.format((i+1.)/size))
+                    next_ten[0] += .1
+                y = y[inan[:,i],i:i+1]
+                return np.einsum('ij,ij->', y,y)
+            traces = [trace(Y) for _ in xrange(size)]
            return Ys, traces
        else:
            self._subarray_indices = [[slice(None),slice(None)]]
@ -241,7 +258,6 @@ class VarDTCMissingData(LatentFunctionInference):
        beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        het_noise = beta_all.size != 1

-        import itertools
        num_inducing = Z.shape[0]

        dL_dpsi0_all = np.zeros(Y.shape[0])
@ -261,22 +277,17 @@ class VarDTCMissingData(LatentFunctionInference):
        Lm = jitchol(Kmm)
        if uncertain_inputs: LmInv = dtrtri(Lm)

-        VVT_factor_all = np.empty(Y.shape)
-        full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
-        if not full_VVT_factor:
-            psi1V = np.dot(Y.T*beta_all, psi1_all).T
-
-        for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
-            if het_noise: beta = beta_all[ind]
+        size = Y.shape[1]
+        next_ten = 0
+        for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
+            if ((i+1.)/size) >= next_ten:
+                logger.info('inference {:> 6.1%}'.format((i+1.)/size))
+                next_ten += .1
+            if het_noise: beta = beta_all[i]
            else: beta = beta_all

-            VVT_factor = (beta*y)
-            try:
-                VVT_factor_all[v, ind].flat = VVT_factor.flat
-            except ValueError:
-                mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape)
-                VVT_factor_all.flat[mult] = VVT_factor
-            output_dim = y.shape[1]
+            VVT_factor = (y*beta)
+            output_dim = 1#len(ind)

            psi0 = psi0_all[v]
            psi1 = psi1_all[v, :]
@ -318,7 +329,6 @@ class VarDTCMissingData(LatentFunctionInference):
                VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
                psi1, het_noise, uncertain_inputs)

-            #import ipdb;ipdb.set_trace()
            dL_dpsi0_all[v] += dL_dpsi0
            dL_dpsi1_all[v, :] += dL_dpsi1
            if uncertain_inputs:
@ -335,19 +345,20 @@ class VarDTCMissingData(LatentFunctionInference):
                psi0, psi1, beta,
                data_fit, num_data, output_dim, trYYT, Y)

-            if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
-            else:
-                print 'foobar'
-                tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
-                tmp, _ = dpotrs(LB, tmp, lower=1)
-                woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]
+            #if full_VVT_factor:
+            woodbury_vector[:, i:i+1] = Cpsi1Vf
+            #else:
+            #    print 'foobar'
+            #    tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
+            #    tmp, _ = dpotrs(LB, tmp, lower=1)
+            #    woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0]

            #import ipdb;ipdb.set_trace()
            Bi, _ = dpotri(LB, lower=1)
            symmetrify(Bi)
            Bi = -dpotri(LB, lower=1)[0]
            diag.add(Bi, 1)
-            woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
+            woodbury_inv_all[:, :, i:i+1] = backsub_both_sides(Lm, Bi)[:,:,None]

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)

@ -364,23 +375,6 @@ class VarDTCMissingData(LatentFunctionInference):
                         'dL_dKnm':dL_dpsi1_all,
                         'dL_dthetaL':dL_dthetaL}

-        #get sufficient things for posterior prediction
-        #TODO: do we really want to do this in  the loop?
-        #if not full_VVT_factor:
-        #    print 'foobar'
-        #    psi1V = np.dot(Y.T*beta_all, psi1_all).T
-        #    tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
-        #    tmp, _ = dpotrs(LB_all, tmp, lower=1)
-        #    woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
-        #import ipdb;ipdb.set_trace()
-        #Bi, _ = dpotri(LB_all, lower=1)
-        #symmetrify(Bi)
-        #Bi = -dpotri(LB_all, lower=1)[0]
-        #from ...util import diag
-        #diag.add(Bi, 1)
-
-        #woodbury_inv = backsub_both_sides(Lm, Bi)
-
        post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)

        return post, log_marginal, grad_dict
--- a/GPy/inference/optimization/init.py
+++ b/GPy/inference/optimization/init.py
@ -1,2 +1,3 @@
 from scg import SCG
 from optimization import *
+from hmc import HMC,HMC_shortcut
--- a/GPy/inference/optimization/hmc.py
+++ b/GPy/inference/optimization/hmc.py
@ -0,0 +1,157 @@
+"""HMC implementation"""
+
+import numpy as np
+
+
+class HMC:
+    def __init__(self,model,M=None,stepsize=1e-1):
+        self.model = model
+        self.stepsize = stepsize
+        self.p = np.empty_like(model.optimizer_array.copy())
+        if M is None:
+            self.M = np.eye(self.p.size)
+        else:
+            self.M = M
+        self.Minv = np.linalg.inv(self.M)
+
+    def sample(self, m_iters=1000, hmc_iters=20):
+        params = np.empty((m_iters,self.p.size))
+        for i in xrange(m_iters):
+            self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
+            H_old = self._computeH()
+            theta_old = self.model.optimizer_array.copy()
+            params[i] = self.model.unfixed_param_array
+            #Matropolis
+            self._update(hmc_iters)
+            H_new = self._computeH()
+
+            if H_old>H_new:
+                k = 1.
+            else:
+                k = np.exp(H_old-H_new)
+            if np.random.rand()<k:
+                params[i] = self.model.unfixed_param_array
+            else:
+                self.model.optimizer_array = theta_old
+        return params
+
+    def _update(self, hmc_iters):
+        for i in xrange(hmc_iters):
+            self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
+            self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
+            self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
+
+    def _computeH(self,):
+        return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
+
+class HMC_shortcut:
+    def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
+        self.model = model
+        self.stepsize_range = np.log(stepsize_range)
+        self.p = np.empty_like(model.optimizer_array.copy())
+        self.groupsize = groupsize
+        self.Hstd_th = Hstd_th
+        if M is None:
+            self.M = np.eye(self.p.size)
+        else:
+            self.M = M
+        self.Minv = np.linalg.inv(self.M)
+
+    def sample(self, m_iters=1000, hmc_iters=20):
+        params = np.empty((m_iters,self.p.size))
+        for i in xrange(m_iters):
+            # sample a stepsize from the uniform distribution
+            stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
+            self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
+            H_old = self._computeH()
+            params[i] = self.model.unfixed_param_array
+            theta_old = self.model.optimizer_array.copy()
+            #Matropolis
+            self._update(hmc_iters, stepsize)
+            H_new = self._computeH()
+
+            if H_old>H_new:
+                k = 1.
+            else:
+                k = np.exp(H_old-H_new)
+            if np.random.rand()<k:
+                params[i] = self.model.unfixed_param_array
+            else:
+                self.model.optimizer_array = theta_old
+        return params
+
+    def _update(self, hmc_iters, stepsize):
+        theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
+        p_buf = np.empty((2*hmc_iters+1,self.p.size))
+        H_buf = np.empty((2*hmc_iters+1,))
+        # Set initial position
+        theta_buf[hmc_iters] = self.model.optimizer_array
+        p_buf[hmc_iters] = self.p
+        H_buf[hmc_iters] = self._computeH()
+
+        reversal = []
+        pos = 1
+        i=0
+        while i<hmc_iters:
+            self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
+            self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
+            self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
+
+            theta_buf[hmc_iters+pos] = self.model.optimizer_array
+            p_buf[hmc_iters+pos] = self.p
+            H_buf[hmc_iters+pos] = self._computeH()
+            i+=1
+
+            if i<self.groupsize:
+                pos += 1
+                continue
+            else:
+                if len(reversal)==0:
+                    Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
+                    if self._testH(H_buf[Hlist]):
+                        pos += 1
+                    else:
+                        # Reverse the trajectory for the 1st time
+                        reversal.append(pos)
+                        if hmc_iters-i>pos:
+                            pos = -1
+                            i += pos
+                            self.model.optimizer_array = theta_buf[hmc_iters]
+                            self.p[:] = -p_buf[hmc_iters]
+                        else:
+                            pos_new = pos-hmc_iters+i
+                            self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
+                            self.p[:] = -p_buf[hmc_iters+pos_new]
+                            break
+                else:
+                    Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
+#                    print Hlist
+#                    print self._testH(H_buf[Hlist])
+
+                    if self._testH(H_buf[Hlist]):
+                        pos += -1
+                    else:
+                        # Reverse the trajectory for the 2nd time
+                        r = (hmc_iters - i)%((reversal[0]-pos)*2)
+                        if r>(reversal[0]-pos):
+                            pos_new = 2*reversal[0] - r - pos
+                        else:
+                            pos_new = pos + r
+                        self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
+                        self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
+#                        print reversal[0],pos,pos_new
+#                        print H_buf
+                        break
+
+    def _testH(self, Hlist):
+        Hstd = np.std(Hlist)
+#        print Hlist
+#        print Hstd
+        if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
+            return False
+        else:
+            return True
+
+    def _computeH(self,):
+        return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
+
--- a/GPy/inference/optimization/scg.py
+++ b/GPy/inference/optimization/scg.py
@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
    if gtol is None:
        gtol = 1e-5

-    sigma0 = 1.0e-8
+    sigma0 = 1.0e-7
    fold = f(x, *optargs) # Initial function value.
    function_eval = 1
    fnow = fold
    gradnew = gradf(x, *optargs) # Initial gradient.
-    if any(np.isnan(gradnew)):
-        raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
+    #if any(np.isnan(gradnew)):
+    #    raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
    current_grad = np.dot(gradnew, gradnew)
    gradold = gradnew.copy()
    d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
        if Delta < 0.25:
            beta = min(4.0 * beta, betamax)
        if Delta > 0.75:
-            beta = max(0.5 * beta, betamin)
+            beta = max(0.25 * beta, betamin)

        # Update search direction using Polak-Ribiere formula, or re-start
        # in direction of negative gradient after nparams steps.
        if nsuccess == x.size:
            d = -gradnew
-#             beta = 1.  # TODO: betareset!!
+            beta = 1. # This is not in the original paper
            nsuccess = 0
        elif success:
            Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
--- a/GPy/installation.cfg
+++ b/GPy/installation.cfg
@ -0,0 +1,2 @@
+# This is the local installation configuration file for GPy
+
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -13,7 +13,9 @@ from _src.ODE_UYC import ODE_UYC
 from _src.ODE_st import ODE_st
 from _src.ODE_t import ODE_t
 from _src.poly import Poly
+
 from _src.trunclinear import TruncLinear,TruncLinear_inf
+from _src.splitKern import SplitKern,DiffGenomeKern

 # TODO: put this in an init file somewhere
 #I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@ -10,7 +10,7 @@ class Add(CombinationKernel):
    """
    Add given list of kernels together.
    propagates gradients through.
-    
+
    This kernel will take over the active dims of it's subkernels passed in.
    """
    def __init__(self, subkerns, name='add'):
@ -40,7 +40,7 @@ class Add(CombinationKernel):
        return reduce(np.add, (p.Kdiag(X) for p in which_parts))

    def update_gradients_full(self, dL_dK, X, X2=None):
-        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
+        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]

    def update_gradients_diag(self, dL_dK, X):
        [p.update_gradients_diag(dL_dK, X) for p in self.parts]
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@ -20,9 +20,11 @@ def index_to_slices(index):
    returns
    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
    """
+    if len(index)==0:
+        return[]

    #contruct the return structure
-    ind = np.asarray(index,dtype=np.int64)
+    ind = np.asarray(index,dtype=np.int)
    ret = [[] for i in range(ind.max()+1)]

    #find the switchpoints
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@ -101,6 +101,7 @@ class PeriodicExponential(Periodic):
        Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
        return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))

+    @silence_errors
    def update_gradients_full(self, dL_dK, X, X2=None):
        """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
        if X2 is None: X2 = X
@ -213,7 +214,7 @@ class PeriodicMatern32(Periodic):
        return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))


-    #@silence_errors
+    @silence_errors
    def update_gradients_full(self,dL_dK,X,X2):
        """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
        if X2 is None: X2 = X
--- a/GPy/kern/_src/psi_comp/init.py
+++ b/GPy/kern/_src/psi_comp/init.py
@ -9,7 +9,7 @@ import ssrbf_psi_comp
 import sslinear_psi_comp
 import linear_psi_comp

-class PSICOMP_RBF(Pickleable):
+class PSICOMP_RBF(object):

    @Cache_this(limit=2, ignore_args=(0,))
    def psicomputations(self, variance, lengthscale, Z, variational_posterior):
@ -29,7 +29,7 @@ class PSICOMP_RBF(Pickleable):
        else:
            raise ValueError, "unknown distriubtion received for psi-statistics"
        
-class PSICOMP_Linear(Pickleable):
+class PSICOMP_Linear(object):

    @Cache_this(limit=2, ignore_args=(0,))
    def psicomputations(self, variance, Z, variational_posterior):
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -43,6 +43,10 @@ class RBF(Stationary):
    def __setstate__(self, state):
        return super(RBF, self).__setstate__(state)

+    def spectrum(self, omega):
+        assert self.input_dim == 1 #TODO: higher dim spectra?
+        return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
+
    #---------------------------------------#
    #             PSI statistics            #
    #---------------------------------------#
--- a/GPy/kern/_src/splitKern.py
+++ b/GPy/kern/_src/splitKern.py
@ -0,0 +1,204 @@
+"""
+A new kernel
+"""
+
+import numpy as np
+from kern import Kern,CombinationKernel
+from .independent_outputs import index_to_slices
+import itertools
+
+class DiffGenomeKern(Kern):
+
+    def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
+        self.idx_p = idx_p
+        self.index_dim=index_dim
+        self.kern = SplitKern(kernel,Xp, index_dim=index_dim)
+        super(DiffGenomeKern, self).__init__(input_dim=kernel.input_dim+1, active_dims=None, name=name)
+        self.add_parameter(self.kern)
+    
+    def K(self, X, X2=None):
+        assert X2==None
+        K = self.kern.K(X,X2)
+        
+        if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
+            return K
+        
+        slices = index_to_slices(X[:,self.index_dim])
+        idx_start = slices[1][0].start
+        idx_end = idx_start+self.idx_p
+        K_c = K[idx_start:idx_end,idx_start:idx_end].copy()
+        K[idx_start:idx_end,:] = K[:self.idx_p,:]
+        K[:,idx_start:idx_end] = K[:,:self.idx_p]
+        K[idx_start:idx_end,idx_start:idx_end] = K_c
+        
+        return K
+    
+    def Kdiag(self,X):
+        Kdiag = self.kern.Kdiag(X)
+
+        if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
+            return Kdiag
+
+        slices = index_to_slices(X[:,self.index_dim])
+        idx_start = slices[1][0].start
+        idx_end = idx_start+self.idx_p
+        Kdiag[idx_start:idx_end] = Kdiag[:self.idx_p]
+        
+        return Kdiag
+    
+    def update_gradients_full(self,dL_dK,X,X2=None):
+        assert X2==None
+        if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
+            self.kern.update_gradients_full(dL_dK, X)
+            return
+        
+        slices = index_to_slices(X[:,self.index_dim])
+        idx_start = slices[1][0].start
+        idx_end = idx_start+self.idx_p
+        
+        self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[:self.idx_p],X)
+        grad_p1 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[:self.idx_p])
+        grad_p2 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[:self.idx_p],X[idx_start:idx_end])
+        grad_p3 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[:self.idx_p])
+        grad_p4 = self.kern.gradient.copy()
+
+        self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[idx_start:idx_end],X)
+        grad_n1 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[idx_start:idx_end])
+        grad_n2 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[idx_start:idx_end])
+        grad_n3 = self.kern.gradient.copy()
+
+        self.kern.update_gradients_full(dL_dK, X)
+        self.kern.gradient += grad_p1+grad_p2-grad_p3-grad_p4-grad_n1-grad_n2+2*grad_n3
+
+    def update_gradients_diag(self, dL_dKdiag, X):
+        pass
+
+class SplitKern(CombinationKernel):
+
+    def __init__(self, kernel, Xp, index_dim=-1, name='SplitKern'):
+        assert isinstance(index_dim, int), "The index dimension must be an integer!"
+        self.kern = kernel
+        self.kern_cross = SplitKern_cross(kernel,Xp)
+        super(SplitKern, self).__init__(kernels=[self.kern, self.kern_cross], extra_dims=[index_dim], name=name)
+        self.index_dim = index_dim
+
+    def K(self,X ,X2=None):
+        slices = index_to_slices(X[:,self.index_dim])
+        assert len(slices)<=2, 'The Split kernel only support two different indices'
+        if X2 is None:
+            target = np.zeros((X.shape[0], X.shape[0]))
+            # diagonal blocks
+            [[target.__setitem__((s,ss), self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
+            if len(slices)>1:
+                # cross blocks
+                [target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[0], slices[1])]
+                # cross blocks
+                [target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[1], slices[0])]
+        else:
+            slices2 = index_to_slices(X2[:,self.index_dim])
+            assert len(slices2)<=2, 'The Split kernel only support two different indices'
+            target = np.zeros((X.shape[0], X2.shape[0]))
+            # diagonal blocks
+            [[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
+            if len(slices)>1:
+                [target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
+            if len(slices2)>1:
+                [target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[0], slices2[1])]                
+        return target
+
+    def Kdiag(self,X):
+        return self.kern.Kdiag(X)
+
+    def update_gradients_full(self,dL_dK,X,X2=None):
+        slices = index_to_slices(X[:,self.index_dim])
+        target = np.zeros(self.kern.size)
+
+        def collate_grads(dL, X, X2, cross=False):
+            if cross:
+                self.kern_cross.update_gradients_full(dL,X,X2)
+                target[:] += self.kern_cross.kern.gradient
+            else:
+                self.kern.update_gradients_full(dL,X,X2)
+                target[:] += self.kern.gradient
+    
+        if X2 is None:
+            assert dL_dK.shape==(X.shape[0],X.shape[0])
+            [[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
+            if len(slices)>1:
+                [collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[0], slices[1])]
+                [collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[1], slices[0])]
+        else:
+            assert dL_dK.shape==(X.shape[0],X2.shape[0])
+            slices2 = index_to_slices(X2[:,self.index_dim])
+            [[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
+            if len(slices)>1:
+                [collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
+            if len(slices2)>1:
+                [collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[0], slices2[1])]
+        self.kern.gradient = target
+
+    def update_gradients_diag(self, dL_dKdiag, X):
+        self.kern.update_gradients_diag(self, dL_dKdiag, X)
+
+class SplitKern_cross(Kern):
+
+    def __init__(self, kernel, Xp, name='SplitKern_cross'):
+        assert isinstance(kernel, Kern)
+        self.kern = kernel
+        if not isinstance(Xp,np.ndarray):
+            Xp = np.array([[Xp]])
+        self.Xp = Xp
+        super(SplitKern_cross, self).__init__(input_dim=kernel.input_dim, active_dims=None, name=name)
+        
+    def K(self, X, X2=None):
+        if X2 is None:
+            return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X))/self.kern.K(self.Xp,self.Xp)
+        else:
+            return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X2))/self.kern.K(self.Xp,self.Xp)
+        
+    def Kdiag(self, X):
+        return np.inner(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X).T)/self.kern.K(self.Xp,self.Xp)
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        if X2 is None:
+            X2 = X
+                        
+        k1 = self.kern.K(X,self.Xp)
+        k2 = self.kern.K(self.Xp,X2)
+        k3 = self.kern.K(self.Xp,self.Xp)
+        dL_dk1 = np.einsum('ij,j->i',dL_dK,k2[0])/k3[0,0]
+        dL_dk2 = np.einsum('ij,i->j',dL_dK,k1[:,0])/k3[0,0]
+        dL_dk3 = np.einsum('ij,ij->',dL_dK,-np.dot(k1,k2)/(k3[0,0]*k3[0,0]))
+
+        self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
+        grad = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X2)
+        grad += self.kern.gradient.copy()
+        self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
+        grad += self.kern.gradient.copy()
+        
+        self.kern.gradient = grad
+
+    def update_gradients_diag(self, dL_dKdiag, X):
+        k1 = self.kern.K(X,self.Xp)
+        k2 = self.kern.K(self.Xp,X)
+        k3 = self.kern.K(self.Xp,self.Xp)
+        dL_dk1 = dL_dKdiag*k2[0]/k3
+        dL_dk2 = dL_dKdiag*k1[:,0]/k3
+        dL_dk3 = -dL_dKdiag*(k1[:,0]*k2[0]).sum()/(k3*k3)
+        
+        self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
+        grad1 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X)
+        grad2 = self.kern.gradient.copy()
+        self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
+        grad3 = self.kern.gradient.copy()
+        
+        self.kern.gradient = grad1+grad2+grad3
+        
+
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -89,13 +89,14 @@ class Stationary(Kern):
            Xsq = np.sum(np.square(X),1)
            r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
            util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
+            r2 = np.clip(r2, 0, np.inf)
            return np.sqrt(r2)
        else:
            #X2, = self._slice_X(X2)
            X1sq = np.sum(np.square(X),1)
            X2sq = np.sum(np.square(X2),1)
            r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
-            r2[r2<0] = 0. # A bit hacky
+            r2 = np.clip(r2, 0, np.inf)
            return np.sqrt(r2)

    @Cache_this(limit=5, ignore_args=())
--- a/GPy/mappings/additive.py
+++ b/GPy/mappings/additive.py
@ -0,0 +1,61 @@
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from ..core.mapping import Mapping
+import GPy
+
+class Additive(Mapping):
+    """
+    Mapping based on adding two existing mappings together.
+
+    .. math::
+
+       f(\mathbf{x}*) = f_1(\mathbf{x}*) + f_2(\mathbf(x)*)
+
+    :param mapping1: first mapping to add together.
+    :type mapping1: GPy.mappings.Mapping
+    :param mapping2: second mapping to add together.
+    :type mapping2: GPy.mappings.Mapping
+    :param tensor: whether or not to use the tensor product of input spaces
+    :type tensor: bool
+
+    """
+
+    def __init__(self, mapping1, mapping2, tensor=False):
+        if tensor:
+            input_dim = mapping1.input_dim + mapping2.input_dim
+        else:
+            input_dim = mapping1.input_dim
+            assert(mapping1.input_dim==mapping2.input_dim)
+        assert(mapping1.output_dim==mapping2.output_dim)
+        output_dim = mapping1.output_dim
+        Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
+        self.mapping1 = mapping1
+        self.mapping2 = mapping2
+        self.num_params = self.mapping1.num_params + self.mapping2.num_params
+        self.name = self.mapping1.name + '+' + self.mapping2.name
+    def _get_param_names(self):
+        return self.mapping1._get_param_names + self.mapping2._get_param_names
+
+    def _get_params(self):
+        return np.hstack((self.mapping1._get_params() self.mapping2._get_params()))
+
+    def _set_params(self, x):
+        self.mapping1._set_params(x[:self.mapping1.num_params])
+        self.mapping2._set_params(x[self.mapping1.num_params:])
+        
+    def randomize(self):
+        self.mapping1._randomize()
+        self.mapping2._randomize()
+
+    def f(self, X):
+        return self.mapping1.f(X) + self.mapping2.f(X)
+
+    def df_dtheta(self, dL_df, X):
+        self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
+        self._df_dbias = (dL_df.sum(0))
+        return np.hstack((self._df_dA.flatten(), self._df_dbias))
+
+    def df_dX(self, dL_df, X):
+        return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) 
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -10,6 +10,7 @@ from ..util import linalg
 from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
 from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
 from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
+import logging

 class BayesianGPLVM(SparseGP):
    """
@ -27,8 +28,10 @@ class BayesianGPLVM(SparseGP):
                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, **kwargs):
        self.mpi_comm = mpi_comm
        self.__IN_OPTIMIZATION__ = False
+        self.logger = logging.getLogger(self.__class__.__name__)
        if X == None:
            from ..util.initialization import initialize_latent
+            self.logger.info("initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)
@ -36,31 +39,35 @@ class BayesianGPLVM(SparseGP):
        self.init = init

        if X_variance is None:
+            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
            X_variance = np.random.uniform(0,.1,X.shape)

-
        if Z is None:
+            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if kernel is None:
+            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

-
        self.variational_prior = NormalPrior()
        X = NormalPosterior(X, X_variance)

        if inference_method is None:
-            if np.any(np.isnan(Y)):
+            inan = np.isnan(Y)
+            if np.any(inan):
                from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
-                inference_method = VarDTCMissingData()
+                self.logger.debug("creating inference_method with var_dtc missing data")
+                inference_method = VarDTCMissingData(inan=inan)
            elif mpi_comm is not None:
                inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
            else:
                from ..inference.latent_function_inference.var_dtc import VarDTC
+                self.logger.debug("creating inference_method var_dtc")
                inference_method = VarDTC()
        if isinstance(inference_method,VarDTC_minibatch):
            inference_method.mpi_comm = mpi_comm
@ -69,6 +76,7 @@ class BayesianGPLVM(SparseGP):
            kernel.psicomp.GPU_direct = True

        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
+        self.logger.info("Adding X as parameter")
        self.add_parameter(self.X, index=0)

        if mpi_comm != None:
@ -98,13 +106,29 @@ class BayesianGPLVM(SparseGP):

        self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])

+        # This is testing code -------------------------
+#         i = np.random.randint(self.X.shape[0])
+#         X_ = self.X.mean
+#         which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
+#         _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
+#         grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
+#
+#         self.X.mean.gradient[:] = 0
+#         self.X.variance.gradient[:] = 0
+#         self.X.mean.gradient[which] = grad[0]
+#         self.X.variance.gradient[which] = grad[1]
+
+        # update for the KL divergence
+#         self.variational_prior.update_gradients_KL(self.X, which)
+        # -----------------------------------------------
+
        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)

    def plot_latent(self, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=True, legend=True,
-                plot_limits=None, 
+                plot_limits=None,
                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
@ -122,36 +146,41 @@ class BayesianGPLVM(SparseGP):
        Notes:
        This will only work with a univariate Gaussian likelihood (for now)
        """
-        assert not self.likelihood.is_heteroscedastic
        N_test = Y.shape[0]
        input_dim = self.Z.shape[1]
+
        means = np.zeros((N_test, input_dim))
        covars = np.zeros((N_test, input_dim))

-        dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
-        dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
-        V = self.likelihood.precision * Y
+        dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
+        dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
+        V = Y/self.likelihood.variance

        #compute CPsi1V
-        if self.Cpsi1V is None:
-            psi1V = np.dot(self.psi1.T, self.likelihood.V)
-            tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
-            tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
-            self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
+        #if self.Cpsi1V is None:
+        #    psi1V = np.dot(self.psi1.T, self.likelihood.V)
+        #    tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+        #    tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
+        #    self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)

-        dpsi1 = np.dot(self.Cpsi1V, V.T)
+        dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)

-        start = np.zeros(self.input_dim * 2)
+        #start = np.zeros(self.input_dim * 2)
+
+
+        from scipy.optimize import minimize

        for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
-            args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
-            xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
-
+            args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2)
+            res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS')
+            xopt = res.x
            mu, log_S = xopt.reshape(2, 1, -1)
            means[n] = mu[0].copy()
            covars[n] = np.exp(log_S[0]).copy()

-        return means, covars
+        X = NormalPosterior(means, covars)
+
+        return X

    def dmu_dX(self, Xnew):
        """
@ -181,7 +210,6 @@ class BayesianGPLVM(SparseGP):
        from ..plotting.matplot_dep import dim_reduction_plots

        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
-
    def __getstate__(self):
        dc = super(BayesianGPLVM, self).__getstate__()
        dc['mpi_comm'] = None
@ -227,57 +255,27 @@ class BayesianGPLVM(SparseGP):
                    raise Exception("Unrecognizable flag for synchronization!")
        self.__IN_OPTIMIZATION__ = False

-def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+
+def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
    """
    objective function for fitting the latent variables for test points
    (negative log-likelihood: should be minimised!)
    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
+    mu = mu_S[:input_dim][None]
+    log_S = mu_S[input_dim:][None]
    S = np.exp(log_S)

-    psi0 = kern.psi0(Z, mu, S)
-    psi1 = kern.psi1(Z, mu, S)
-    psi2 = kern.psi2(Z, mu, S)
+    X = NormalPosterior(mu, S)

-    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
+    psi0 = kern.psi0(Z, X)
+    psi1 = kern.psi1(Z, X)
+    psi2 = kern.psi2(Z, X)

-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
+    lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)

-    dmu = mu0 + mu1 + mu2 - mu
+    dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
+    dmu = dLdmu - mu
    # dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
+    dlnS = S * (dLdS - 0.5) + .5
+
    return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
-
-def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
-    """
-    objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
-    This is the same as latent_cost_and_grad but only for the objective
-    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
-    S = np.exp(log_S)
-
-    psi0 = kern.psi0(Z, mu, S)
-    psi1 = kern.psi1(Z, mu, S)
-    psi2 = kern.psi2(Z, mu, S)
-
-    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
-    return -float(lik)
-
-def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
-    """
-    This is the same as latent_cost_and_grad but only for the grad
-    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
-    S = np.exp(log_S)
-
-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
-
-    dmu = mu0 + mu1 + mu2 - mu
-    # dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
-
-    return -np.hstack((dmu.flatten(), dlnS.flatten()))
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -2,10 +2,8 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

 import numpy as np
-import itertools
-import pylab
+import itertools, logging

-from ..core import Model
 from ..kern import Kern
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 from ..core.parameterization import Param, Parameterized
@ -61,15 +59,18 @@ class MRD(SparseGP):
                 inference_method=None, likelihoods=None, name='mrd', Ynames=None):
        super(GP, self).__init__(name)

+        self.logger = logging.getLogger(self.__class__.__name__)
        self.input_dim = input_dim
        self.num_inducing = num_inducing

        if isinstance(Ylist, dict):
            Ynames, Ylist = zip(*Ylist.items())

+        self.logger.debug("creating observable arrays")
        self.Ylist = [ObsAr(Y) for Y in Ylist]

        if Ynames is None:
+            self.logger.debug("creating Ynames")
            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
        self.names = Ynames
        assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
@ -81,13 +82,15 @@ class MRD(SparseGP):
                inan = np.isnan(y)
                if np.any(inan):
                    if not warned:
-                        print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X"
+                        self.logger.warn("WARNING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X")
                        warned = True
                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
                else:
                    self.inference_method.append(VarDTC(limit=1))
+                self.logger.debug("created inference method <{}>".format(hex(id(self.inference_method[-1]))))
        else:
            if not isinstance(inference_method, InferenceMethodList):
+                self.logger.debug("making inference_method an InferenceMethodList")
                inference_method = InferenceMethodList(inference_method)
            self.inference_method = inference_method

@ -101,18 +104,19 @@ class MRD(SparseGP):
        self.num_inducing = self.Z.shape[0] # ensure M==N if M>N

        # sort out the kernels
+        self.logger.info("building kernels")
        if kernel is None:
            from ..kern import RBF
-            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
+            kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
-            self.kernels = []
+            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
-                self.kernels.append(k)
+                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
            assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
-            self.kernels = kernel
+            kernels = kernel

        if X_variance is None:
            X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -121,17 +125,17 @@ class MRD(SparseGP):
        self.X = NormalPosterior(X, X_variance)

        if likelihoods is None:
-            self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
-        else: self.likelihoods = likelihoods
+            likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
+        else: likelihoods = likelihoods

+        self.logger.info("adding X and Z")
        self.add_parameters(self.X, self.Z)

        self.bgplvms = []
        self.num_data = Ylist[0].shape[0]

-        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
+        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
-
            p = Parameterized(name=n)
            p.add_parameter(k)
            p.kern = k
@ -141,6 +145,7 @@ class MRD(SparseGP):
            self.bgplvms.append(p)

        self.posterior = None
+        self.logger.info("init done")
        self._in_init_ = False

    def parameters_changed(self):
@ -148,8 +153,9 @@ class MRD(SparseGP):
        self.posteriors = []
        self.Z.gradient[:] = 0.
        self.X.gradient[:] = 0.
-
-        for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
+        for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
+            self.logger.info('working on im <{}>'.format(hex(id(i))))
+            k, l = b.kern, b.likelihood
            posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)

            self.posteriors.append(posterior)
@ -177,11 +183,11 @@ class MRD(SparseGP):
            self.X.mean.gradient += dL_dmean
            self.X.variance.gradient += dL_dS

-        # update for the KL divergence
        self.posterior = self.posteriors[0]
-        self.kern = self.kernels[0]
-        self.likelihood = self.likelihoods[0]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood

+        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)
        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)

@ -219,8 +225,9 @@ class MRD(SparseGP):
        return Z

    def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
+        import matplotlib.pyplot as plt
        if axes is None:
-            fig = pylab.figure(num=fignum)
+            fig = plt.figure(num=fignum)
        sharex_ax = None
        sharey_ax = None
        plots = []
@ -242,8 +249,8 @@ class MRD(SparseGP):
                raise ValueError("Need one axes per latent dimension input_dim")
            plots.append(plotf(i, g, ax))
            if sharey_ax is not None:
-                pylab.setp(ax.get_yticklabels(), visible=False)
-        pylab.draw()
+                plt.setp(ax.get_yticklabels(), visible=False)
+        plt.draw()
        if axes is None:
            try:
                fig.tight_layout()
@ -257,8 +264,8 @@ class MRD(SparseGP):
        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
        """
        self.posterior = self.posteriors[Yindex]
-        self.kern = self.kernels[Yindex]
-        self.likelihood = self.likelihoods[Yindex]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood
        return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)

    #===============================================================================
@ -300,11 +307,12 @@ class MRD(SparseGP):
        """
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from matplotlib import pyplot as plt
        from ..plotting.matplot_dep import dim_reduction_plots
        if "Yindex" not in predict_kwargs:
            predict_kwargs['Yindex'] = 0
        if ax is None:
-            fig = pylab.figure(num=fignum)
+            fig = plt.figure(num=fignum)
            ax = fig.add_subplot(111)
        else:
            fig = ax.figure
@ -321,10 +329,7 @@ class MRD(SparseGP):
        return plot

    def __getstate__(self):
-        # TODO:
-        import copy
-        state = copy.copy(self.__dict__)
-        del state['kernels']
+        state = super(MRD, self).__getstate__()
        del state['kern']
        del state['likelihood']
        return state
@ -332,7 +337,6 @@ class MRD(SparseGP):
    def __setstate__(self, state):
        # TODO:
        super(MRD, self).__setstate__(state)
-        self.kernels = [p.kern for p in self.bgplvms]
-        self.kern = self.kernels[0]
-        self.likelihood = self.likelihoods[0]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood
        self.parameters_changed()
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@ -64,7 +64,7 @@ class SSGPLVM(SparseGP):
        if inference_method is None:
            inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)

-        self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
+        self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=True) # the prior probability of the latent binary variable b
        
        X = SpikeAndSlabPosterior(X, X_variance, gamma)
        
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -30,7 +30,7 @@ def most_significant_input_dimensions(model, which_indices):
 def plot_latent(model, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=False, legend=True,
-                plot_limits=None, 
+                plot_limits=None,
                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
    """
    :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None,
                            cmap=pb.cm.binary, **imshow_kwargs)

    # make sure labels are in order of input:
+    labels = np.asarray(labels)
    ulabels = []
    for lab in labels:
        if not lab in ulabels:
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -8,7 +8,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
 from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
 from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
-
+from scipy import sparse

 def plot_fit(model, plot_limits=None, which_data_rows='all',
        which_data_ycols='all', fixed_inputs=[],
@ -61,11 +61,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',

    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
        X = model.X.mean
-        X_variance = param_to_array(model.X.variance)
+        X_variance = model.X.variance
    else:
        X = model.X
-    X, Y = param_to_array(X, model.Y)
-    if hasattr(model, 'Z'): Z = param_to_array(model.Z)
+    #X, Y = param_to_array(X, model.Y)
+    Y = model.Y
+    if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
+
+    if hasattr(model, 'Z'): Z = model.Z

    #work out what the inputs are for plotting (1D or 2D)
    fixed_dims = np.array([i for i,v in fixed_inputs])
@ -147,7 +150,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        if plot_raw:
            m, _ = model._raw_predict(Xgrid)
        else:
-            m, _ = model.predict(Xgrid)
+            if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
+                meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
+            else:
+                meta = None
+            m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
        for d in which_data_ycols:
            m_d = m[:,d].reshape(resolution, resolution).T
            plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -98,9 +98,9 @@ class lvm(matplotlib_show):
        """
        if vals is None:
            if isinstance(model.X, VariationalPosterior):
-                vals = param_to_array(model.X.mean)
+                vals = model.X.mean.values
            else:
-                vals = param_to_array(model.X)
+                vals = model.X.values
        if len(vals.shape)==1:
            vals = vals[None,:]
        matplotlib_show.__init__(self, vals, axes=latent_axes)
@ -136,7 +136,7 @@ class lvm(matplotlib_show):

    def modify(self, vals):
        """When latent values are modified update the latent representation and ulso update the output visualization."""
-        self.vals = vals.copy()
+        self.vals = vals.view(np.ndarray).copy()
        y = self.model.predict(self.vals)[0]
        self.data_visualize.modify(y)
        self.latent_handle.set_data(self.vals[0,self.latent_index[0]], self.vals[0,self.latent_index[1]])
@ -226,6 +226,7 @@ class lvm_dimselect(lvm):
        self.labels = labels
        lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
        self.show_sensitivities()
+        print self.latent_values
        print "use left and right mouse buttons to select dimensions"


@ -255,6 +256,7 @@ class lvm_dimselect(lvm):


    def on_leave(self,event):
+        print type(self.latent_values)
        latent_values = self.latent_values.copy()
        y = self.model.predict(latent_values[None,:])[0]
        self.data_visualize.modify(y)
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -94,22 +94,18 @@ class MiscTests(unittest.TestCase):
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

        m.kern.lengthscale.randomize()
-        m._trigger_params_changed()
        m2.kern.lengthscale = m.kern.lengthscale
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

        m.kern.lengthscale.randomize()
-        m._trigger_params_changed()
        m2['.*lengthscale'] = m.kern.lengthscale
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

        m.kern.lengthscale.randomize()
-        m._trigger_params_changed()
        m2['.*lengthscale'] = m.kern['.*lengthscale']
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

        m.kern.lengthscale.randomize()
-        m._trigger_params_changed()
        m2.kern.lengthscale = m.kern['.*lengthscale']
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

@ -130,6 +126,23 @@ class MiscTests(unittest.TestCase):
        m2.kern[:] = m.kern[''].values()
        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())

+    def test_big_model(self):
+        m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
+        m.X.fix()
+        print m
+        m.unfix()
+        m.checkgrad()
+        print m
+        m.fix()
+        print m
+        m.inducing_inputs.unfix()
+        print m
+        m.checkgrad()
+        m.unfix()
+        m.checkgrad()
+        m.checkgrad()
+        print m
+
    def test_model_set_params(self):
        m = GPy.models.GPRegression(self.X, self.Y)
        lengthscale = np.random.uniform()
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -8,6 +8,7 @@ import GPy
 import numpy as np
 from GPy.core.parameterization.parameter_core import HierarchyError
 from GPy.core.parameterization.observable_array import ObsAr
+from GPy.core.parameterization.transformations import NegativeLogexp

 class ArrayCoreTest(unittest.TestCase):
    def setUp(self):
@ -38,10 +39,25 @@ class ParameterizedTest(unittest.TestCase):
        self.test1.kern = self.rbf+self.white
        self.test1.add_parameter(self.test1.kern)
        self.test1.add_parameter(self.param, 0)
+        # print self.test1:
+        #=============================================================================
+        # test_model.          |    Value    |  Constraint   |  Prior  |  Tied to
+        # param                |  (25L, 2L)  |   {0.0,1.0}   |         |
+        # add.rbf.variance     |        1.0  |  0.0,1.0 +ve  |         |
+        # add.rbf.lengthscale  |        1.0  |  0.0,1.0 +ve  |         |
+        # add.white.variance   |        1.0  |  0.0,1.0 +ve  |         |
+        #=============================================================================

        x = np.linspace(-2,6,4)[:,None]
        y = np.sin(x)
        self.testmodel = GPy.models.GPRegression(x,y)
+        # print self.testmodel:
+        #=============================================================================
+        # GP_regression.           |  Value  |  Constraint  |  Prior  |  Tied to
+        # rbf.variance             |    1.0  |     +ve      |         |
+        # rbf.lengthscale          |    1.0  |     +ve      |         |
+        # Gaussian_noise.variance  |    1.0  |     +ve      |         |
+        #=============================================================================

    def test_add_parameter(self):
        self.assertEquals(self.rbf._parent_index_, 0)
@ -142,8 +158,13 @@ class ParameterizedTest(unittest.TestCase):
        self.testmodel.randomize()
        self.assertEqual(val, self.testmodel.kern.lengthscale)

-    
-    
+    def test_add_parameter_in_hierarchy(self):
+        from GPy.core import Param
+        self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
+        self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
+
    def test_regular_expression_misc(self):
        self.testmodel.kern.lengthscale.fix()
        val = float(self.testmodel.kern.lengthscale)
@ -174,4 +195,4 @@ class ParameterizedTest(unittest.TestCase):

 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_add_parameter']
-    unittest.main()
+    unittest.main()
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@ -16,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
 from GPy.kern._src.rbf import RBF
 from GPy.kern._src.linear import Linear
 from GPy.kern._src.static import Bias, White
-from GPy.examples.dimensionality_reduction import mrd_simulation,\
-    bgplvm_simulation
+from GPy.examples.dimensionality_reduction import mrd_simulation
 from GPy.examples.regression import toy_rbf_1d_50
 from GPy.core.parameterization.variational import NormalPosterior
 from GPy.models.gp_regression import GPRegression
@ -90,6 +89,7 @@ class Test(ListDictTestCase):
        self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
        self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
+        pcopy.gradient = 10 # gradient does not get copied anymore
        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
        self.assertSequenceEqual(str(par), str(pcopy))
        self.assertIsNot(par.param_array, pcopy.param_array)
@ -126,8 +126,8 @@ class Test(ListDictTestCase):
    def test_modelrecreation(self):
        par = toy_rbf_1d_50(optimize=0, plot=0)
        pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
-        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
-        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
+        np.testing.assert_allclose(par.param_array, pcopy.param_array)
+        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
        self.assertSequenceEqual(str(par), str(pcopy))
        self.assertIsNot(par.param_array, pcopy.param_array)
        self.assertIsNot(par.gradient_full, pcopy.gradient_full)
@ -140,7 +140,7 @@ class Test(ListDictTestCase):
            par.pickle(f)
            f.seek(0)
            pcopy = pickle.load(f)
-        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
+        np.testing.assert_allclose(par.param_array, pcopy.param_array)
        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
        self.assertSequenceEqual(str(par), str(pcopy))
        self.assert_(pcopy.checkgrad())
@ -151,6 +151,7 @@ class Test(ListDictTestCase):
        par = NormalPosterior(X,Xv)
        par.gradient = 10
        pcopy = par.copy()
+        pcopy.gradient = 10
        self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
        self.assertSequenceEqual(str(par), str(pcopy))
@ -175,6 +176,7 @@ class Test(ListDictTestCase):
        self.assertSequenceEqual(str(par), str(pcopy))
        self.assertIsNot(par.param_array, pcopy.param_array)
        self.assertIsNot(par.gradient_full, pcopy.gradient_full)
+        self.assertTrue(par.checkgrad())
        self.assertTrue(pcopy.checkgrad())
        self.assert_(np.any(pcopy.gradient!=0.0))
        with tempfile.TemporaryFile('w+b') as f:
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -1,9 +1,7 @@
 from ..core.parameterization.parameter_core import Observable
-import itertools, collections, weakref
+import collections, weakref, logging

 class Cacher(object):
-
-
    def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
        """
        Parameters:
@ -12,31 +10,37 @@ class Cacher(object):
        :param int limit: depth of cacher
        :param [int] ignore_args: list of indices, pointing at arguments to ignore in *args of operation(*args). This includes self!
        :param [str] force_kwargs: list of kwarg names (strings). If a kwarg with that name is given, the cacher will force recompute and wont cache anything.
+        :param int verbose: verbosity level. 0: no print outs, 1: casual print outs, 2: debug level print outs
        """
        self.limit = int(limit)
        self.ignore_args = ignore_args
        self.force_kwargs = force_kwargs
-        self.operation=operation
+        self.operation = operation
        self.order = collections.deque()
-        self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
+        self.cached_inputs = {}  # point from cache_ids to a list of [ind_ids], which where used in cache cache_id

        #=======================================================================
        # point from each ind_id to [ref(obj), cache_ids]
        # 0: a weak reference to the object itself
        # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
-        self.cached_input_ids = {} 
+        self.cached_input_ids = {}
        #=======================================================================

-        self.cached_outputs = {} # point from cache_ids to outputs
-        self.inputs_changed = {} # point from cache_ids to bools
+        self.cached_outputs = {}  # point from cache_ids to outputs
+        self.inputs_changed = {}  # point from cache_ids to bools

-    def combine_inputs(self, args, kw, ignore_args):
+    def id(self, obj):
+        """returns the self.id of an object, to be used in caching individual self.ids"""
+        return hex(id(obj))
+
+    def combine_inputs(self, args, kw):
        "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
-        return tuple(a for i,a in enumerate(args) if i not in ignore_args) + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
+        return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))

-    def prepare_cache_id(self, combined_args_kw):
-        "get the cacheid (conc. string of argument ids in order) ignoring ignore_args"
-        return "".join(str(id(a)) for a in combined_args_kw)
+    def prepare_cache_id(self, combined_args_kw, ignore_args):
+        "get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
+        cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
+        return cache_id

    def ensure_cache_length(self, cache_id):
        "Ensures the cache is within its limits and has one place free"
@ -45,58 +49,61 @@ class Cacher(object):
            cache_id = self.order.popleft()
            combined_args_kw = self.cached_inputs[cache_id]
            for ind in combined_args_kw:
-                if ind is None:
-                    continue
-                ind_id = id(ind)
-                ref, cache_ids = self.cached_input_ids[ind_id]
-                if len(cache_ids) == 1 and ref() is not None:
-                    ref().remove_observer(self, self.on_cache_changed)
-                    del self.cached_input_ids[ind_id]
-                else:
-                    cache_ids.remove(cache_id)
-                    self.cached_input_ids[ind_id] = [ref, cache_ids]
+                if ind is not None:
+                    ind_id = self.id(ind)
+                    tmp = self.cached_input_ids.get(ind_id, None)
+                    if tmp is not None:
+                        ref, cache_ids = tmp
+                        if len(cache_ids) == 1 and ref() is not None:
+                            ref().remove_observer(self, self.on_cache_changed)
+                            del self.cached_input_ids[ind_id]
+                        else:
+                            cache_ids.remove(cache_id)
+                            self.cached_input_ids[ind_id] = [ref, cache_ids]
            del self.cached_outputs[cache_id]
            del self.inputs_changed[cache_id]
            del self.cached_inputs[cache_id]

-    def add_to_cache(self, cache_id, combined_args_kw, output):
+    def add_to_cache(self, cache_id, inputs, output):
+        """This adds cache_id to the cache, with inputs and output"""
        self.inputs_changed[cache_id] = False
        self.cached_outputs[cache_id] = output
        self.order.append(cache_id)
-        self.cached_inputs[cache_id] = combined_args_kw
-        for a in combined_args_kw:
-            if a is None:
-                continue
-            ind_id = id(a)
-            v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
-            v[1].append(cache_id)
-            if len(v[1]) == 1:
-                a.add_observer(self, self.on_cache_changed)
-            self.cached_input_ids[ind_id] = v
+        self.cached_inputs[cache_id] = inputs
+        for a in inputs:
+            if a is not None:
+                ind_id = self.id(a)
+                v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
+                v[1].append(cache_id)
+                if len(v[1]) == 1:
+                    a.add_observer(self, self.on_cache_changed)
+                self.cached_input_ids[ind_id] = v

    def __call__(self, *args, **kw):
        """
        A wrapper function for self.operation,
        """
-    
+        #=======================================================================
+        # !WARNING CACHE OFFSWITCH!
+        # return self.operation(*args, **kw)
+        #=======================================================================
+
        # 1: Check whether we have forced recompute arguments:
        if len(self.force_kwargs) != 0:
            for k in self.force_kwargs:
                if k in kw and kw[k] is not None:
                    return self.operation(*args, **kw)
-    
-        # 2: prepare_cache_id and get the unique id string for this call
-        inputs = self.combine_inputs(args, kw, self.ignore_args)
-        cache_id = self.prepare_cache_id(inputs)

+        # 2: prepare_cache_id and get the unique self.id string for this call
+        inputs = self.combine_inputs(args, kw)
+        cache_id = self.prepare_cache_id(inputs, self.ignore_args)
        # 2: if anything is not cachable, we will just return the operation, without caching
-        if reduce(lambda a,b: a or (not (isinstance(b, Observable) or (b is None))), inputs, False):
-            # print '['+self.operation.__name__+'] contain un-cachable arguments!'
+        if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
            return self.operation(*args, **kw)
        # 3&4: check whether this cache_id has been cached, then has it changed?
        try:
            if(self.inputs_changed[cache_id]):
-                # 4: This happens, when elements have changed for this cache id
+                # 4: This happens, when elements have changed for this cache self.id
                self.inputs_changed[cache_id] = False
                self.cached_outputs[cache_id] = self.operation(*args, **kw)
        except KeyError:
@ -115,10 +122,12 @@ class Cacher(object):

        this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
        """
-        for ind_id in [id(direct), id(which)]:
-            _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
-            for cache_id in cache_ids:
-                self.inputs_changed[cache_id] = True
+        for what in [direct, which]:
+            if what is not None:
+                ind_id = self.id(what)
+                _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
+                for cache_id in cache_ids:
+                    self.inputs_changed[cache_id] = True

    def reset(self):
        """
@ -133,8 +142,7 @@ class Cacher(object):
        return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)

    def __getstate__(self, memo=None):
-        return (self.limit)
-#        raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
+        raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))

    def __setstate__(self, memo=None):
        raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
@ -155,9 +163,8 @@ class Cacher_wrap(object):
    def __get__(self, obj, objtype=None):
        return partial(self, obj)
    def __call__(self, *args, **kwargs):
-        obj = args[0] # <------------------- WHAT IF IT IS ONLY A FUNCTION!
-        
-        #import ipdb;ipdb.set_trace()
+        obj = args[0]
+        # import ipdb;ipdb.set_trace()
        try:
            caches = obj.__cachers
        except AttributeError:
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@ -5,18 +5,19 @@ import ConfigParser
 import os
 config = ConfigParser.ConfigParser()

-home = os.getenv('HOME') or os.getenv('USERPROFILE')
-user_file = os.path.join(home,'.gpy_config.cfg')
-default_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'gpy_config.cfg'))
-# print user_file, os.path.isfile(user_file)
-# print default_file, os.path.isfile(default_file)
+# This is the default configuration file that always needs to be present.
+default_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'defaults.cfg'))

-# 1. check if the user has a ~/.gpy_config.cfg
-if os.path.isfile(user_file):
-    config.read(user_file)
-elif os.path.isfile(default_file):
-    # 2. if not, use the default one
-    config.read(default_file)
-else:
-    #3. panic
-    raise ValueError, "no configuration file found"
+# These files are optional
+# This specifies configurations that are typically specific to the machine (it is found alongside the GPy installation).
+local_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'installation.cfg'))
+
+# This specifies configurations specific to the user (it is found in the user home directory)
+home = os.getenv('HOME') or os.getenv('USERPROFILE')
+user_file = os.path.join(home,'.gpy_user.cfg')
+
+# Read in the given files.
+config.readfp(open(default_file))
+config.read([local_file, user_file])
+if not config:
+    raise ValueError, "No configuration file found at either " + user_file + " or " + local_file + " or " + default_file + "."
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@ -57,6 +57,20 @@
            "http://www.cs.nyu.edu/~roweis/data/"
        ]
    },
+    "cifar-10": {
+        "citation": "Learning Multiple Layers of Features from Tiny Images, Alex Krizhevsky, 2009, Tech report available here: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf",
+        "details": "The CIFAR-10 and CIFAR-100 are labeled subsets of the 80 million tiny images dataset. They were collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton. Details are available on this webpage: http://www.cs.toronto.edu/~kriz/cifar.html. The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.",
+        "files": [
+            [
+                "cifar-10-python.tar.gz"
+            ]
+        ],
+        "license": null,
+        "size": 0,
+        "urls": [
+            "http://www.cs.toronto.edu/~kriz/"
+        ]
+    },
    "cmu_mocap_full": {
        "citation": "Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\\nThe database was created with funding from NSF EIA-0196217.",
        "details": "CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
@ -123,11 +137,39 @@
            ]
        ],
        "license": null,
-        "size": 0,
+        "size": 20258,
        "urls": [
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/drosophila_protein/"
        ]
    },
+    "spellman_yeast": {
+        "citation": "Paul T. Spellman, Gavin Sherlock, Michael Q. Zhang, Vishwanath R. Iyer, Kirk Anders, Michael B. Eisen, Patrick O. Brown, David Botstein, and Bruce Futcher 'Comprehensive Identification of Cell Cycle-regulated Genes of the Yeast Saccharomyces cerevisiae by Microarray Hybridization.'  Molecular Biology of the Cell 9, 3273-3297",
+        "details": "Two colour spotted cDNA array data set of a series of experiments to identify which genes in Yeast are cell cycle regulated.",
+        "files": [
+            [
+                "combined.txt"
+            ]
+        ],
+        "license": null,
+        "size": 2510955,
+        "urls": [
+            "http://genome-www.stanford.edu/cellcycle/data/rawdata/"
+        ]
+    },
+    "lee_yeast_ChIP": {
+        "citation": "Tong Ihn Lee, Nicola J. Rinaldi, Francois Robert, Duncan T. Odom, Ziv Bar-Joseph, Georg K. Gerber, Nancy M. Hannett, Christopher T. Harbison, Craig M. Thompson, Itamar Simon, Julia Zeitlinger, Ezra G. Jennings, Heather L. Murray, D. Benjamin Gordon, Bing Ren, John J. Wyrick, Jean-Bosco Tagne, Thomas L. Volkert, Ernest Fraenkel, David K. Gifford, Richard A. Young 'Transcriptional Regulatory Networks in Saccharomyces cerevisiae' Science 298 (5594) pg 799--804. DOI: 10.1126/science.1075090",
+        "details": "Binding location analysis for 106 regulators in yeast. The data consists of p-values for binding of regulators to genes derived from ChIP-chip experiments.",
+        "files": [
+            [
+                "binding_by_gene.tsv"
+            ]
+        ],
+        "license": null,
+        "size": 1674161,
+        "urls": [
+            "http://jura.wi.mit.edu/young_public/regulatory_network/"
+        ]
+    },
    "epomeo_gpx": {
        "citation": "",
        "details": "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -51,7 +51,7 @@ if not (on_rtd):
    json_data=open(path).read()
    football_dict = json.loads(json_data)

-    
+

 def prompt_user(prompt):
    """Ask user for agreeing to data set licenses."""
@ -128,14 +128,14 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
            f.write(buff)
            sys.stdout.write(" "*(len(status)) + "\r")
            if file_size:
-                status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), 
-                                                                       full=file_size/(1048576.), ll=line_length, 
+                status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.),
+                                                                       full=file_size/(1048576.), ll=line_length,
                                                                       perc="="*int(line_length*float(file_size_dl)/file_size))
            else:
-                status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), 
-                                                                       ll=line_length, 
+                status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.),
+                                                                       ll=line_length,
                                                                       perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
-                
+
            sys.stdout.write(status)
            sys.stdout.flush()
        sys.stdout.write(" "*(len(status)) + "\r")
@ -240,7 +240,7 @@ def cmu_urls_files(subj_motions, messages = True):
        if not os.path.exists(cur_skel_file):
            # Current skel file doesn't exist.
            if not os.path.isdir(skel_dir):
-                os.mkdir(skel_dir)
+                os.makedirs(skel_dir)
            # Add skel file to list.
            url_required = True
            file_download.append(subjects[i] + '.asf')
@ -320,7 +320,7 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None):
            Y = Y[:, None]
    return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)

-    
+

 def football_data(season='1314', data_set='football_data'):
    """Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """
@ -367,36 +367,73 @@ def sod1_mouse(data_set='sod1_mouse'):
    if not data_available(data_set):
        download_data(data_set)
    from pandas import read_csv
-    dirpath = os.path.join(data_path, data_set)
-    filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'sod1_C57_129_exprs.csv')
    Y = read_csv(filename, header=0, index_col=0)
    num_repeats=4
    num_time=4
    num_cond=4
    X = 1
    return data_details_return({'X': X, 'Y': Y}, data_set)
-    
+
+def spellman_yeast(data_set='spellman_yeast'):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'combined.txt')
+    Y = read_csv(filename, header=0, index_col=0, sep='\t')
+    return data_details_return({'Y': Y}, data_set)
+
+def spellman_yeast_cdc15(data_set='spellman_yeast'):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'combined.txt')
+    Y = read_csv(filename, header=0, index_col=0, sep='\t')
+    t = np.asarray([10, 30, 50, 70, 80, 90, 100, 110, 120, 130, 140, 150, 170, 180, 190, 200, 210, 220, 230, 240, 250, 270, 290])
+    times = ['cdc15_'+str(time) for time in t]
+    Y = Y[times].T
+    t = t[:, None]
+    return data_details_return({'Y' : Y, 't': t, 'info': 'Time series of synchronized yeast cells from the CDC-15 experiment of Spellman et al (1998).'}, data_set)
+
+def lee_yeast_ChIP(data_set='lee_yeast_ChIP'):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    import zipfile
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'binding_by_gene.tsv')
+    S = read_csv(filename, header=1, index_col=0, sep='\t')
+    transcription_factors = [col for col in S.columns if col[:7] != 'Unnamed']
+    annotations = S[['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']]
+    S = S[transcription_factors]
+    return data_details_return({'annotations' : annotations, 'Y' : S, 'transcription_factors': transcription_factors}, data_set)
+
+
+
 def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
    if not data_available(data_set):
        download_data(data_set)
    from pandas import read_csv
-    dirpath = os.path.join(data_path, data_set)
-    filename = os.path.join(dirpath, 'tomancak_exprs.csv')
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'tomancak_exprs.csv')
    Y = read_csv(filename, header=0, index_col=0).T
    num_repeats = 3
    num_time = 12
    xt = np.linspace(0, num_time-1, num_time)
    xr = np.linspace(0, num_repeats-1, num_repeats)
    xtime, xrepeat = np.meshgrid(xt, xr)
-    X = np.vstack((xtime.flatten(), xrepeat.flatten())).T    
+    X = np.vstack((xtime.flatten(), xrepeat.flatten())).T
    return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)

 def drosophila_protein(data_set='drosophila_protein'):
    if not data_available(data_set):
        download_data(data_set)
    from pandas import read_csv
-    dirpath = os.path.join(data_path, data_set)
-    filename = os.path.join(dirpath, 'becker_et_al.csv')
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'becker_et_al.csv')
    Y = read_csv(filename, header=0)
    return data_details_return({'Y': Y}, data_set)

@ -404,8 +441,8 @@ def drosophila_knirps(data_set='drosophila_protein'):
    if not data_available(data_set):
        download_data(data_set)
    from pandas import read_csv
-    dirpath = os.path.join(data_path, data_set)
-    filename = os.path.join(dirpath, 'becker_et_al.csv')
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'becker_et_al.csv')
    # in the csv file we have facts_kni and ext_kni. We treat facts_kni as protein and ext_kni as mRNA
    df = read_csv(filename, header=0)
    t = df['t'][:,None]
@ -426,32 +463,60 @@ def drosophila_knirps(data_set='drosophila_protein'):
    return data_details_return({'Y': Y, 'X': X}, data_set)

 # This will be for downloading google trends data.
-def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
-    """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
-    # Inspired by this notebook:
-    # http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb
+def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends', refresh_data=False):
+    """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations. The function will cache the result of your query, if you wish to refresh an old query set refresh_data to True. The function is inspired by this notebook: http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb"""
+    query_terms.sort()
+    import pandas

-    # quote the query terms.
-    for i, element in enumerate(query_terms):
-        query_terms[i] = urllib2.quote(element)
-    query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(query_terms)
+    # Create directory name for data
+    dir_path = os.path.join(data_path,'google_trends')
+    if not os.path.isdir(dir_path):
+        os.makedirs(dir_path)
+    dir_name = '-'.join(query_terms)
+    dir_name = dir_name.replace(' ', '_')
+    dir_path = os.path.join(dir_path,dir_name)
+    file = 'data.csv'
+    file_name = os.path.join(dir_path,file)
+    if not os.path.exists(file_name) or refresh_data:
+        print "Accessing Google trends to acquire the data. Note that repeated accesses will result in a block due to a google terms of service violation. Failure at this point may be due to such blocks."
+        # quote the query terms.
+        quoted_terms = []
+        for term in query_terms:
+            quoted_terms.append(urllib2.quote(term))
+        print "Query terms: ", ', '.join(query_terms)

-    data = urllib2.urlopen(query).read()
+        print "Fetching query:"
+        query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(quoted_terms)

-    # In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
-    header = """// Data table response\ngoogle.visualization.Query.setResponse("""
-    data = data[len(header):-2]
-    data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
-    timeseries = json.loads(data)
-    #import pandas as pd
-    columns = [k['label'] for k in timeseries['table']['cols']]
-    rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
-    terms = len(columns)-1
-    X = np.asarray([(pb.datestr2num(row[0]), i) for i in range(terms) for row in rows ])
-    Y = np.asarray([[row[i+1]] for i in range(terms) for row in rows ])
+        data = urllib2.urlopen(query).read()
+        print "Done."
+        # In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
+        header = """// Data table response\ngoogle.visualization.Query.setResponse("""
+        data = data[len(header):-2]
+        data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
+        timeseries = json.loads(data)
+        columns = [k['label'] for k in timeseries['table']['cols']]
+        rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
+        df = pandas.DataFrame(rows, columns=columns)
+        if not os.path.isdir(dir_path):
+            os.makedirs(dir_path)
+
+        df.to_csv(file_name)
+    else:
+        print "Reading cached data for google trends. To refresh the cache set 'refresh_data=True' when calling this function."
+        print "Query terms: ", ', '.join(query_terms)
+
+        df = pandas.read_csv(file_name, parse_dates=[0])
+
+    columns = df.columns
+    terms = len(query_terms)
+    import datetime
+    X = np.asarray([(row, i) for i in range(terms) for row in df.index])
+    Y = np.asarray([[df.ix[row][query_terms[i]]] for i in range(terms) for row in df.index ])
    output_info = columns[1:]
-    return data_details_return({'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set)
-    
+
+    return data_details_return({'data frame' : df, 'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set)
+
 # The data sets
 def oil(data_set='three_phase_oil_flow'):
    """The three phase oil data from Bishop and James (1993)."""
@ -582,7 +647,7 @@ def decampos_digits(data_set='decampos_characters', which_digits=[0,1,2,3,4,5,6,
    lbls = np.array([[l]*num_samples for l in which_digits]).reshape(Y.shape[0], 1)
    str_lbls = np.array([[str(l)]*num_samples for l in which_digits])
    return data_details_return({'Y': Y, 'lbls': lbls, 'str_lbls' : str_lbls, 'info': 'Digits data set from the de Campos characters data'}, data_set)
-    
+
 def ripley_synth(data_set='ripley_prnn_data'):
    if not data_available(data_set):
        download_data(data_set)
@ -594,7 +659,23 @@ def ripley_synth(data_set='ripley_prnn_data'):
    ytest = test[:, 2:3]
    return data_details_return({'X': X, 'Y': y, 'Xtest': Xtest, 'Ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)

-def mauna_loa(data_set='mauna_loa', num_train=543, refresh_data=False):
+def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False):
+    path = os.path.join(data_path, data_set)
+    if data_available(data_set) and not refresh_data:
+        print 'Using cached version of the data set, to use latest version set refresh_data to True'
+    else:
+        download_data(data_set)
+    data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data'))
+    print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0]
+    allX = data[data[:, 3]!=-99.99, 2:3]
+    allY = data[data[:, 3]!=-99.99, 3:4]
+    X = allX[:num_train, 0:1]
+    Xtest = allX[num_train:, 0:1]
+    Y = allY[:num_train, 0:1]
+    Ytest = allY[num_train:, 0:1]
+    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
+
+def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
    path = os.path.join(data_path, data_set)
    if data_available(data_set) and not refresh_data:
        print 'Using cached version of the data set, to use latest version set refresh_data to True'
@ -609,7 +690,7 @@ def mauna_loa(data_set='mauna_loa', num_train=543, refresh_data=False):
    Y = allY[:num_train, 0:1]
    Ytest = allY[num_train:, 0:1]
    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
-    
+

 def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
    path = os.path.join(data_path, data_set)
@ -621,7 +702,7 @@ def boxjenkins_airline(data_set='boxjenkins_airline', num_train=96):
    Xtest = data[num_train:, 0:1]
    Ytest = data[num_train:, 1:2]
    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Montly airline passenger data from Box & Jenkins 1976."}, data_set)
-    
+

 def osu_run1(data_set='osu_run1', sample_every=4):
    path = os.path.join(data_path, data_set)
@ -635,7 +716,7 @@ def osu_run1(data_set='osu_run1', sample_every=4):
    return data_details_return({'Y': Y, 'connect' : connect}, data_set)

 def swiss_roll_generated(num_samples=1000, sigma=0.0):
-    with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
+    with open(os.path.join(os.path.dirname(__file__), 'datasets', 'swiss_roll.pickle')) as f:
        data = pickle.load(f)
    Na = data['Y'].shape[0]
    perm = np.random.permutation(np.r_[:Na])[:num_samples]
@ -660,7 +741,7 @@ def hapmap3(data_set='hapmap3'):
          \ -1, iff SNPij==(B2,B2)

    The SNP data and the meta information (such as iid, sex and phenotype) are
-    stored in the dataframe datadf, index is the Individual ID, 
+    stored in the dataframe datadf, index is the Individual ID,
    with following columns for metainfo:

        * family_id   -> Family ID
@ -688,15 +769,15 @@ def hapmap3(data_set='hapmap3'):
    except ImportError as i:
        raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"

-    dirpath = os.path.join(data_path,'hapmap3')
+    dir_path = os.path.join(data_path,'hapmap3')
    hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
-    unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
+    unpacked_files = [os.path.join(dir_path, hapmap_file_name+ending) for ending in ['.ped', '.map']]
    unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))

    if not unpacked_files_exist and not data_available(data_set):
        download_data(data_set)

-    preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
+    preprocessed_data_paths = [os.path.join(dir_path,hapmap_file_name + file_name) for file_name in \
                               ['.snps.pickle',
                                '.info.pickle',
                                '.nan.pickle']]
@ -733,13 +814,13 @@ def hapmap3(data_set='hapmap3'):
                status=write_status('unpacking...', curr, status)
                os.remove(filepath)
        status=write_status('reading .ped...', 25, status)
-        # Preprocess data:    
+        # Preprocess data:
        snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
        status=write_status('reading .map...', 33, status)
        mapnp = np.loadtxt(unpacked_files[1], dtype=str)
        status=write_status('reading relationships.txt...', 42, status)
        # and metainfo:
-        infodf = DataFrame.from_csv(os.path.join(dirpath,'./relationships_w_pops_121708.txt'), header=0, sep='\t')
+        infodf = DataFrame.from_csv(os.path.join(dir_path,'./relationships_w_pops_121708.txt'), header=0, sep='\t')
        infodf.set_index('IID', inplace=1)
        status=write_status('filtering nan...', 45, status)
        snpstr = snpstrnp[:,6:].astype('S1').reshape(snpstrnp.shape[0], -1, 2)
@ -804,12 +885,12 @@ def singlecell(data_set='singlecell'):
        download_data(data_set)

    from pandas import read_csv
-    dirpath = os.path.join(data_path, data_set)
-    filename = os.path.join(dirpath, 'singlecell.csv')
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'singlecell.csv')
    Y = read_csv(filename, header=0, index_col=0)
    genes = Y.columns
    labels = Y.index
-    # data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str)
+    # data = np.loadtxt(os.path.join(dir_path, 'singlecell.csv'), delimiter=",", dtype=str)
    return data_details_return({'Y': Y, 'info' : "qPCR singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]",
                                'genes': genes, 'labels':labels,
                                }, data_set)
@ -894,7 +975,7 @@ def olivetti_glasses(data_set='olivetti_glasses', num_training=200, seed=default
    Y = y[index[:num_training],:]
    Ytest = y[index[num_training:]]
    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed' : seed, 'info': "ORL Faces with labels identifiying who is wearing glasses and who isn't. Data is randomly partitioned according to given seed. Presence or absence of glasses was labelled by James Hensman."}, 'olivetti_faces')
-    
+
 def olivetti_faces(data_set='olivetti_faces'):
    path = os.path.join(data_path, data_set)
    if not data_available(data_set):
@ -907,7 +988,8 @@ def olivetti_faces(data_set='olivetti_faces'):
    for subject in range(40):
        for image in range(10):
            image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
-            Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
+            from GPy.util import netpbmfile
+            Y.append(netpbmfile.imread(image_path).flatten())
            lbls.append(subject)
    Y = np.asarray(Y)
    lbls = np.asarray(lbls)[:, None]
@ -1109,6 +1191,30 @@ def creep_data(data_set='creep_rupture'):
    X = all_data[:, features].copy()
    return data_details_return({'X': X, 'y': y}, data_set)

+def cifar10_patches(data_set='cifar-10'):
+    """The Candian Institute for Advanced Research 10 image data set. Code for loading in this data is taken from this Boris Babenko's blog post, original code available here: http://bbabenko.tumblr.com/post/86756017649/learning-low-level-vision-feautres-in-10-lines-of-code"""
+    dir_path = os.path.join(data_path, data_set)
+    filename = os.path.join(dir_path, 'cifar-10-python.tar.gz')
+    if not data_available(data_set):
+        download_data(data_set)
+        import tarfile
+        # This code is from Boris Babenko's blog post.
+        # http://bbabenko.tumblr.com/post/86756017649/learning-low-level-vision-feautres-in-10-lines-of-code
+        tfile = tarfile.open(filename, 'r:gz')
+        tfile.extractall(dir_path)
+
+    with open(os.path.join(dir_path, 'cifar-10-batches-py','data_batch_1'),'rb') as f:
+        data = pickle.load(f)
+
+    images = data['data'].reshape((-1,3,32,32)).astype('float32')/255
+    images = np.rollaxis(images, 1, 4)
+    patches = np.zeros((0,5,5,3))
+    for x in range(0,32-5,5):
+        for y in range(0,32-5,5):
+            patches = np.concatenate((patches, images[:,x:x+5,y:y+5,:]), axis=0)
+    patches = patches.reshape((patches.shape[0],-1))
+    return data_details_return({'Y': patches, "info" : "32x32 pixel patches extracted from the CIFAR-10 data by Boris Babenko to demonstrate k-means features."}, data_set)
+
 def cmu_mocap_49_balance(data_set='cmu_mocap'):
    """Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
    train_motions = ['18', '19']
--- a/GPy/util/datasets/swiss_roll.pickle
+++ b/GPy/util/datasets/swiss_roll.pickle
--- a/GPy/util/initialization.py
+++ b/GPy/util/initialization.py
@ -16,8 +16,8 @@ def initialize_latent(init, input_dim, Y):
        var = p.fracs[:input_dim]
    else:
        var = Xr.var(0)
-    
+
    Xr -= Xr.mean(0)
-    Xr /= Xr.var(0)
-    
+    Xr /= Xr.std(0)
+
    return Xr, var/var.max()
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,13 +15,19 @@ import scipy
 import warnings
 import os
 from config import *
+import logging

-if float('.'.join((scipy.__version__).split('.')[:2])) >= 0.12:
+_scipyversion = np.float64((scipy.__version__).split('.')[:2])
+_fix_dpotri_scipy_bug = True
+if np.all(_scipyversion >= np.array([0, 14])):
+    from scipy.linalg import lapack
+    _fix_dpotri_scipy_bug = False
+elif np.all(_scipyversion >= np.array([0, 12])):
+    #import scipy.linalg.lapack.clapack as lapack
    from scipy.linalg import lapack
 else:
    from scipy.linalg.lapack import flapack as lapack

-
 if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'):
    try:
        anaconda_path = str(config.get('anaconda', 'location'))
@ -29,6 +35,7 @@ if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda',
        dsyrk = mkl_rt.dsyrk
        dsyr = mkl_rt.dsyr
        _blas_available = True
+        print 'anaconda installed and mkl is loaded'
    except:
        _blas_available = False
 else:
@ -87,14 +94,20 @@ def jitchol(A, maxtries=5):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        while maxtries > 0 and np.isfinite(jitter):
-            print 'Warning: adding jitter of {:.10e}'.format(jitter)
            try:
-                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
+                L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
            except:
                jitter *= 10
            finally:
                maxtries -= 1
        raise linalg.LinAlgError, "not positive definite, even with jitter."
+    import traceback
+    try: raise
+    except:
+        logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
+            '  in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
+    import ipdb;ipdb.set_trace()
+    return L



@ -104,7 +117,7 @@ def jitchol(A, maxtries=5):
 #     """
 #     Wrapper for lapack dtrtri function
 #     Inverse of L
-# 
+#
 #     :param L: Triangular Matrix L
 #     :param lower: is matrix lower (true) or upper (false)
 #     :returns: Li, info
@ -116,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
    """
    Wrapper for lapack dtrtrs function

+    DTRTRS solves a triangular system of the form
+
+        A * X = B  or  A**T * X = B,
+
+    where A is a triangular matrix of order N, and B is an N-by-NRHS
+    matrix.  A check is made to verify that A is nonsingular.
+
    :param A: Matrix A(triangular)
    :param B: Matrix B
    :param lower: is matrix lower (true) or upper (false)
-    :returns:
+    :returns: Solution to A * X = B or A**T * X = B

    """
    A = np.asfortranarray(A)
@ -141,15 +161,22 @@ def dpotri(A, lower=1):
    """
    Wrapper for lapack dpotri function

+    DPOTRI - compute the inverse of a real symmetric positive
+      definite matrix A using the Cholesky factorization A =
+      U**T*U or A = L*L**T computed by DPOTRF
+
    :param A: Matrix A
    :param lower: is matrix lower (true) or upper (false)
    :returns: A inverse

    """
-    assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
+    if _fix_dpotri_scipy_bug:
+        assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
+        lower = 0

    A = force_F_ordered(A)
-    R, info = lapack.dpotri(A, lower=0)
+    R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug
+
    symmetrify(R)
    return R, info

@ -216,7 +243,7 @@ def pdinv(A, *args):
    L = jitchol(A, *args)
    logdet = 2.*np.sum(np.log(np.diag(L)))
    Li = dtrtri(L)
-    Ai, _ = lapack.dpotri(L)
+    Ai, _ = dpotri(L, lower=1)
    # Ai = np.tril(Ai) + np.tril(Ai,-1).T
    symmetrify(Ai)

--- a/GPy/util/netpbmfile.py
+++ b/GPy/util/netpbmfile.py
@ -0,0 +1,331 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# netpbmfile.py
+
+# Copyright (c) 2011-2013, Christoph Gohlke
+# Copyright (c) 2011-2013, The Regents of the University of California
+# Produced at the Laboratory for Fluorescence Dynamics.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of the copyright holders nor the names of any
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Read and write image data from respectively to Netpbm files.
+
+This implementation follows the Netpbm format specifications at
+http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
+
+The following image formats are supported: PBM (bi-level), PGM (grayscale),
+PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
+
+:Author:
+  `Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
+
+:Organization:
+  Laboratory for Fluorescence Dynamics, University of California, Irvine
+
+:Version: 2013.01.18
+
+Requirements
+------------
+* `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
+* `Numpy 1.7 <http://www.numpy.org>`_
+* `Matplotlib 1.2 <http://www.matplotlib.org>`_  (optional for plotting)
+
+Examples
+--------
+>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
+>>> imsave('_tmp.pgm', im1)
+>>> im2 = imread('_tmp.pgm')
+>>> assert numpy.all(im1 == im2)
+
+"""
+
+from __future__ import division, print_function
+
+import sys
+import re
+import math
+from copy import deepcopy
+
+import numpy
+
+__version__ = '2013.01.18'
+__docformat__ = 'restructuredtext en'
+__all__ = ['imread', 'imsave', 'NetpbmFile']
+
+
+def imread(filename, *args, **kwargs):
+    """Return image data from Netpbm file as numpy array.
+
+    `args` and `kwargs` are arguments to NetpbmFile.asarray().
+
+    Examples
+    --------
+    >>> image = imread('_tmp.pgm')
+
+    """
+    try:
+        netpbm = NetpbmFile(filename)
+        image = netpbm.asarray()
+    finally:
+        netpbm.close()
+    return image
+
+
+def imsave(filename, data, maxval=None, pam=False):
+    """Write image data to Netpbm file.
+
+    Examples
+    --------
+    >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
+    >>> imsave('_tmp.pgm', image)
+
+    """
+    try:
+        netpbm = NetpbmFile(data, maxval=maxval)
+        netpbm.write(filename, pam=pam)
+    finally:
+        netpbm.close()
+
+
+class NetpbmFile(object):
+    """Read and write Netpbm PAM, PBM, PGM, PPM, files."""
+
+    _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
+              b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
+              b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
+
+    def __init__(self, arg=None, **kwargs):
+        """Initialize instance from filename, open file, or numpy array."""
+        for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
+                     'depth', 'tupltypes', '_filename', '_fh', '_data'):
+            setattr(self, attr, None)
+        if arg is None:
+            self._fromdata([], **kwargs)
+        elif isinstance(arg, basestring):
+            self._fh = open(arg, 'rb')
+            self._filename = arg
+            self._fromfile(self._fh, **kwargs)
+        elif hasattr(arg, 'seek'):
+            self._fromfile(arg, **kwargs)
+            self._fh = arg
+        else:
+            self._fromdata(arg, **kwargs)
+
+    def asarray(self, copy=True, cache=False, **kwargs):
+        """Return image data from file as numpy array."""
+        data = self._data
+        if data is None:
+            data = self._read_data(self._fh, **kwargs)
+            if cache:
+                self._data = data
+            else:
+                return data
+        return deepcopy(data) if copy else data
+
+    def write(self, arg, **kwargs):
+        """Write instance to file."""
+        if hasattr(arg, 'seek'):
+            self._tofile(arg, **kwargs)
+        else:
+            with open(arg, 'wb') as fid:
+                self._tofile(fid, **kwargs)
+
+    def close(self):
+        """Close open file. Future asarray calls might fail."""
+        if self._filename and self._fh:
+            self._fh.close()
+            self._fh = None
+
+    def __del__(self):
+        self.close()
+
+    def _fromfile(self, fh):
+        """Initialize instance from open file."""
+        fh.seek(0)
+        data = fh.read(4096)
+        if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
+            raise ValueError("Not a Netpbm file:\n%s" % data[:32])
+        try:
+            self._read_pam_header(data)
+        except Exception:
+            try:
+                self._read_pnm_header(data)
+            except Exception:
+                raise ValueError("Not a Netpbm file:\n%s" % data[:32])
+
+    def _read_pam_header(self, data):
+        """Read PAM header and initialize instance."""
+        regroups = re.search(
+            b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
+            b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
+            b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
+        self.header = regroups[0]
+        self.magicnum = b'P7'
+        for group in regroups[1:]:
+            key, value = group.split()
+            setattr(self, unicode(key).lower(), int(value))
+        matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
+        self.tupltypes = [s.split(None, 1)[1] for s in matches]
+
+    def _read_pnm_header(self, data):
+        """Read PNM header and initialize instance."""
+        bpm = data[1:2] in b"14"
+        regroups = re.search(b"".join((
+            b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
+            b"\s*(\d+)\s+(?:#.*[\r\n])*",
+            b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
+            b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
+        self.header = regroups[0]
+        self.magicnum = regroups[1]
+        self.width = int(regroups[2])
+        self.height = int(regroups[3])
+        self.maxval = int(regroups[4])
+        self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
+        self.tupltypes = [self._types[self.magicnum]]
+
+    def _read_data(self, fh, byteorder='>'):
+        """Return image data from open file as numpy array."""
+        fh.seek(len(self.header))
+        data = fh.read()
+        dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
+        depth = 1 if self.magicnum == b"P7 332" else self.depth
+        shape = [-1, self.height, self.width, depth]
+        size = numpy.prod(shape[1:])
+        if self.magicnum in b"P1P2P3":
+            data = numpy.array(data.split(None, size)[:size], dtype)
+            data = data.reshape(shape)
+        elif self.maxval == 1:
+            shape[2] = int(math.ceil(self.width / 8))
+            data = numpy.frombuffer(data, dtype).reshape(shape)
+            data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
+        else:
+            data = numpy.frombuffer(data, dtype)
+            data = data[:size * (data.size // size)].reshape(shape)
+        if data.shape[0] < 2:
+            data = data.reshape(data.shape[1:])
+        if data.shape[-1] < 2:
+            data = data.reshape(data.shape[:-1])
+        if self.magicnum == b"P7 332":
+            rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
+            rgb332 *= [36, 36, 85]
+            data = numpy.take(rgb332, data, axis=0)
+        return data
+
+    def _fromdata(self, data, maxval=None):
+        """Initialize instance from numpy array."""
+        data = numpy.array(data, ndmin=2, copy=True)
+        if data.dtype.kind not in "uib":
+            raise ValueError("not an integer type: %s" % data.dtype)
+        if data.dtype.kind == 'i' and numpy.min(data) < 0:
+            raise ValueError("data out of range: %i" % numpy.min(data))
+        if maxval is None:
+            maxval = numpy.max(data)
+            maxval = 255 if maxval < 256 else 65535
+        if maxval < 0 or maxval > 65535:
+            raise ValueError("data out of range: %i" % maxval)
+        data = data.astype('u1' if maxval < 256 else '>u2')
+        self._data = data
+        if data.ndim > 2 and data.shape[-1] in (3, 4):
+            self.depth = data.shape[-1]
+            self.width = data.shape[-2]
+            self.height = data.shape[-3]
+            self.magicnum = b'P7' if self.depth == 4 else b'P6'
+        else:
+            self.depth = 1
+            self.width = data.shape[-1]
+            self.height = data.shape[-2]
+            self.magicnum = b'P5' if maxval > 1 else b'P4'
+        self.maxval = maxval
+        self.tupltypes = [self._types[self.magicnum]]
+        self.header = self._header()
+
+    def _tofile(self, fh, pam=False):
+        """Write Netbm file."""
+        fh.seek(0)
+        fh.write(self._header(pam))
+        data = self.asarray(copy=False)
+        if self.maxval == 1:
+            data = numpy.packbits(data, axis=-1)
+        data.tofile(fh)
+
+    def _header(self, pam=False):
+        """Return file header as byte string."""
+        if pam or self.magicnum == b'P7':
+            header = "\n".join((
+                "P7",
+                "HEIGHT %i" % self.height,
+                "WIDTH %i" % self.width,
+                "DEPTH %i" % self.depth,
+                "MAXVAL %i" % self.maxval,
+                "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
+                "ENDHDR\n"))
+        elif self.maxval == 1:
+            header = "P4 %i %i\n" % (self.width, self.height)
+        elif self.depth == 1:
+            header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
+        else:
+            header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
+        if sys.version_info[0] > 2:
+            header = bytes(header, 'ascii')
+        return header
+
+    def __str__(self):
+        """Return information about instance."""
+        return unicode(self.header)
+
+
+if sys.version_info[0] > 2:
+    basestring = str
+    unicode = lambda x: str(x, 'ascii')
+
+if __name__ == "__main__":
+    # Show images specified on command line or all images in current directory
+    from glob import glob
+    from matplotlib import pyplot
+    files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
+    for fname in files:
+        try:
+            pam = NetpbmFile(fname)
+            img = pam.asarray(copy=False)
+            if False:
+                pam.write('_tmp.pgm.out', pam=True)
+                img2 = imread('_tmp.pgm.out')
+                assert numpy.all(img == img2)
+                imsave('_tmp.pgm.out', img)
+                img2 = imread('_tmp.pgm.out')
+                assert numpy.all(img == img2)
+            pam.close()
+        except ValueError as e:
+            print(fname, e)
+            continue
+        _shape = img.shape
+        if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
+            img = img[0]
+        cmap = 'gray' if pam.maxval > 1 else 'binary'
+        pyplot.imshow(img, cmap, interpolation='nearest')
+        pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
+                                      _shape, img.dtype))
+        pyplot.show()
--- a/GPy/util/subarray_and_sorting.py
+++ b/GPy/util/subarray_and_sorting.py
@ -4,9 +4,9 @@
 .. moduleauthor:: Max Zwiessele <ibinbei@gmail.com>

 '''
-__updated__ = '2014-05-20'
+__updated__ = '2014-05-21'

-import numpy as np
+import numpy as np, logging

 def common_subarrays(X, axis=0):
    """
@ -14,15 +14,15 @@ def common_subarrays(X, axis=0):
    Common subarrays are returned as a dictionary of <subarray, [index]> pairs, where
    the subarray is a tuple representing the subarray and the index is the index
    for the subarray in X, where index is the index to the remaining axis.
-    
+
    :param :class:`np.ndarray` X: 2d array to check for common subarrays in
-    :param int axis: axis to apply subarray detection over. 
-        When the index is 0, compare rows -- columns, otherwise.   
-    
+    :param int axis: axis to apply subarray detection over.
+        When the index is 0, compare rows -- columns, otherwise.
+
    Examples:
    =========

-    In a 2d array:    
+    In a 2d array:
    >>> import numpy as np
    >>> X = np.zeros((3,6), dtype=bool)
    >>> X[[1,1,1],[0,4,5]] = 1; X[1:,[2,3]] = 1
@ -48,9 +48,15 @@ def common_subarrays(X, axis=0):
    assert X.ndim == 2 and axis in (0,1), "Only implemented for 2D arrays"
    subarrays = defaultdict(list)
    cnt = count()
-    np.apply_along_axis(lambda x: iadd(subarrays[tuple(x)], [cnt.next()]), 1-axis, X)
+    def accumulate(x, s, c):
+        t = tuple(x)
+        col = c.next()
+        iadd(s[t], [col])
+        return None
+    if axis == 0: [accumulate(x, subarrays, cnt) for x in X]
+    else: [accumulate(x, subarrays, cnt) for x in X.T]
    return subarrays

 if __name__ == '__main__':
    import doctest
-    doctest.testmod()
+    doctest.testmod()
--- a/GPy/util/univariate_Gaussian.py
+++ b/GPy/util/univariate_Gaussian.py
@ -40,6 +40,37 @@ def std_norm_cdf(x):
    weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
    return cdf_x

+def std_norm_cdf_np(x):
+    """
+    Cumulative standard Gaussian distribution
+    Based on Abramowitz, M. and Stegun, I. (1970)
+    Around 3 times slower when x is a scalar otherwise quite a lot slower
+    """
+    x_shape = np.asarray(x).shape
+
+    if len(x_shape) == 0 or x_shape[0] == 1:
+        sign = np.sign(x)
+        x *= sign
+        x /= np.sqrt(2.)
+        t = 1.0/(1.0 +  0.3275911*x)
+        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
+        cdf_x = 0.5*(1.0 + sign*erf)
+        return cdf_x
+    else:
+        x = np.atleast_1d(x).copy()
+        cdf_x = np.zeros_like(x)
+        sign = np.ones_like(x)
+        neg_x_ind = x<0
+        sign[neg_x_ind] = -1.0
+        x[neg_x_ind] = -x[neg_x_ind]
+        x /= np.sqrt(2.)
+        t = 1.0/(1.0 +  0.3275911*x)
+        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
+        cdf_x = 0.5*(1.0 + sign*erf)
+        cdf_x = cdf_x.reshape(x_shape)
+    return cdf_x
+
+
 def inv_std_norm_cdf(x):
    """
    Inverse cumulative standard Gaussian distribution
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -2,3 +2,7 @@ include *.txt
 recursive-include doc *.txt
 include *.md
 recursive-include doc *.md
+include *.cfg
+recursive-include doc *.cfg
+include *.json
+recursive-include doc *.json
--- a/setup.py
+++ b/setup.py
@ -7,6 +7,7 @@ from setuptools import setup
 # Version number
 version = '0.4.6'

+from pkg_resources import Requirement
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read()

@ -20,7 +21,7 @@ setup(name = 'GPy',
      url = "http://sheffieldml.github.com/GPy/",
      packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"],
      package_dir={'GPy': 'GPy'},
-      package_data = {'GPy': ['GPy/examples']},
+      package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
      py_modules = ['GPy.__init__'],
      long_description=read('README.md'),
      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
@ -29,4 +30,5 @@ setup(name = 'GPy',
      },
      classifiers=[
      "License :: OSI Approved :: BSD License"],
+      zip_safe = False
      )
				`@ -0,0 +1,2 @@`
				`# This is the local installation configuration file for GPy`