Merge branch 'params' of github.com:SheffieldML/GPy into params

2026-07-11 16:22:13 +02:00 · 2014-03-03 17:45:28 +00:00 · 2014-03-03 17:45:28 +00:00 · 8926ffeb28
commit 8926ffeb28
parent db57005826 8b2ba52a8f
29 changed files with 1030 additions and 720 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -31,7 +31,7 @@ class GP(Model):
        super(GP, self).__init__(name)
        assert X.ndim == 2
-        if isinstance(X, ObservableArray) or isinstance(X, VariationalPosterior):
+        if isinstance(X, (ObservableArray, VariationalPosterior)):
            self.X = X
        else: self.X = ObservableArray(X)
@ -224,13 +224,9 @@ class GP(Model):
                self.kern,
                self.likelihood,
                self.output_dim,
                self._Xoffset,
                self._Xscale,
                ]
    def _setstate(self, state):
        self._Xscale = state.pop()
        self._Xoffset = state.pop()
        self.output_dim = state.pop()
        self.likelihood = state.pop()
        self.kern = state.pop()
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -60,20 +60,6 @@ class Model(Parameterized):
        self.priors = state.pop()
        Parameterized._setstate(self, state)
    def randomize(self):
        """
        Randomize the model.
        Make this draw from the prior if one exists, else draw from N(0,1)
        """
        # first take care of all parameters (from N(0,1))
        # x = self._get_params_transformed()
        x = np.random.randn(self.size_transformed)
        x = self._untransform_params(x)
        # now draw from prior where possible
        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
        self._set_params(x)
        # self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
    def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
        """
        Perform random restarts of the model, and set the model to the best
@ -240,6 +226,11 @@ class Model(Parameterized):
        TODO: valid args
        """
        if self.is_fixed:
            raise RuntimeError, "Cannot optimize, when everything is fixed"
        if self.size == 0:
            raise RuntimeError, "Model without parameters cannot be minimized"
        if optimizer is None:
            optimizer = self.preferred_optimizer
@ -279,7 +270,7 @@ class Model(Parameterized):
           and numerical gradients is within <tolerance> of unity.
        """
-        x = self._get_params_transformed().copy()
+        x = self._get_params_transformed()
        if not verbose:
            # make sure only to test the selected parameters
@ -297,7 +288,7 @@ class Model(Parameterized):
                    return
            # just check the global ratio
-            dx = np.zeros_like(x)
+            dx = np.zeros(x.shape)
            dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
            # evaulate around the point x
@ -308,9 +299,8 @@ class Model(Parameterized):
            dx = dx[transformed_index]
            gradient = gradient[transformed_index]
            numerical_gradient = (f1 - f2) / (2 * dx)
            global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient == 0, 1e-32, gradient)))
-            return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
+            return (np.abs(1. - global_ratio) < tolerance)
        else:
            # check the gradient of each parameter individually, and do some pretty printing
            try:
--- a/GPy/core/parameterization/array_core.py
+++ b/GPy/core/parameterization/array_core.py
@ -6,19 +6,6 @@ __updated__ = '2013-12-16'
 import numpy as np
 from parameter_core import Observable
 class ParamList(list):
    """
    List to store ndarray-likes in.
    It will look for 'is' instead of calling __eq__ on each element.
    """
    def __contains__(self, other):
        for el in self:
            if el is other:
                return True
        return False
    pass
 class ObservableArray(np.ndarray, Observable):
    """
    An ndarray which reports changes to its observers.
@ -62,10 +49,11 @@ class ObservableArray(np.ndarray, Observable):
    def __setitem__(self, s, val):
        if self._s_not_empty(s):
            super(ObservableArray, self).__setitem__(s, val)
-            self._notify_observers()
+            self._notify_observers(self[s])
    def __getslice__(self, start, stop):
        return self.__getitem__(slice(start, stop))
    def __setslice__(self, start, stop, val):
        return self.__setitem__(slice(start, stop), val)
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -5,47 +5,7 @@ Created on Oct 2, 2013
 '''
 import numpy
 from numpy.lib.function_base import vectorize
-from param import Param
+from lists_and_dicts import IntArrayDict
 from collections import defaultdict
 class ParamDict(defaultdict):
    def __init__(self):
        """
        Default will be self._default, if not set otherwise
        """
        defaultdict.__init__(self, self.default_factory)
    def __getitem__(self, key):
        try:
            return defaultdict.__getitem__(self, key)
        except KeyError:
            for a in self.iterkeys():
                if numpy.all(a==key) and a._parent_index_==key._parent_index_:
                    return defaultdict.__getitem__(self, a)
            raise        
    def __contains__(self, key):
        if defaultdict.__contains__(self, key):
            return True
        for a in self.iterkeys():
            if numpy.all(a==key) and a._parent_index_==key._parent_index_:
                return True
        return False
    def __setitem__(self, key, value):
        if isinstance(key, Param):
            for a in self.iterkeys():
                if numpy.all(a==key) and a._parent_index_==key._parent_index_:
                    return super(ParamDict, self).__setitem__(a, value)
        defaultdict.__setitem__(self, key, value)
 class SetDict(ParamDict):
    def default_factory(self):
        return set()
 class IntArrayDict(ParamDict):
    def default_factory(self):
        return numpy.int_([])
 class ParameterIndexOperations(object):
    '''
@ -194,9 +154,13 @@ class ParameterIndexOperationsView(object):
    def shift_right(self, start, size):
-        raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'
+        self._param_index_ops.shift_right(start+self._offset, size)
    def shift_left(self, start, size):
        self._param_index_ops.shift_left(start+self._offset, size)
        self._offset -= size
        self._size -= size
    def clear(self):
        for i, ind in self.items():
            self._param_index_ops.remove(i, ind+self._offset)
@ -232,9 +196,7 @@ class ParameterIndexOperationsView(object):
    def __getitem__(self, prop):
        ind = self._filter_index(self._param_index_ops[prop])
-        if ind.size > 0:
+        return ind
            return ind
        raise KeyError, prop
    def __str__(self, *args, **kwargs):
        import pprint
--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@ -0,0 +1,35 @@
 '''
 Created on 27 Feb 2014
@author: maxz
 '''
 from collections import defaultdict
 class DefaultArrayDict(defaultdict):
    def __init__(self):
        """
        Default will be self._default, if not set otherwise
        """
        defaultdict.__init__(self, self.default_factory)
 class SetDict(DefaultArrayDict):
    def default_factory(self):
        return set()
 class IntArrayDict(DefaultArrayDict):
    def default_factory(self):
        import numpy as np
        return np.int_([])
 class ArrayList(list):
    """
    List to store ndarray-likes in.
    It will look for 'is' instead of calling __eq__ on each element.
    """
    def __contains__(self, other):
        for el in self:
            if el is other:
                return True
        return False
    pass
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -3,8 +3,8 @@
 import itertools
 import numpy
-from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing
+from parameter_core import OptimizationHandlable, Gradcheckable, adjust_name_for_printing
-from array_core import ObservableArray, ParamList
+from array_core import ObservableArray
 ###### printing
 __constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
 __print_threshold__ = 5
 ######
-class Param(Constrainable, ObservableArray, Gradcheckable):
+class Param(OptimizationHandlable, ObservableArray, Gradcheckable):
    """
    Parameter object for GPy models.
@ -50,7 +50,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        obj._realsize_ = obj.size
        obj._realndim_ = obj.ndim
        obj._updated_ = False
-        from index_operations import SetDict
+        from lists_and_dicts import SetDict
        obj._tied_to_me_ = SetDict()
        obj._tied_to_ = []
        obj._original_ = True
@ -148,8 +148,11 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
    #===========================================================================
    # get/set parameters
    #===========================================================================
-    def _set_params(self, param, update=True):
+    def _set_params(self, param, trigger_parent=True):
        self.flat = param
        if trigger_parent: min_priority = None
        else: min_priority = -numpy.inf
        self._notify_observers(None, min_priority)
    def _get_params(self):
        return self.flat
@ -172,11 +175,9 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
        except AttributeError: pass  # returning 0d array or float, double etc
        return new_arr
    def __setitem__(self, s, val):
        super(Param, self).__setitem__(s, val)
        if self.has_parent():
            self._direct_parent_._notify_parameters_changed()
        #self._notify_observers()
    #===========================================================================
    # Index Operations:
@ -204,6 +205,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        ind = self._indices(slice_index)
        if ind.ndim < 2: ind = ind[:, None]
        return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
    def _expand_index(self, slice_index=None):
        # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
        # it basically translates slices to their respective index arrays and turns negative indices around
@ -230,7 +232,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
    #===========================================================================
    @property
    def is_fixed(self):
-        return self._highest_parent_._is_fixed(self)
+        from transformations import __fixed__
        return self.constraints[__fixed__].size == self.size
    #def round(self, decimals=0, out=None):
    #    view = super(Param, self).round(decimals, out).view(Param)
    #    view.__array_finalize__(self)
@ -244,7 +247,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
    #===========================================================================
    @property
    def _description_str(self):
-        if self.size <= 1: return ["%f" % self]
+        if self.size <= 1: 
            return [str(numpy.take(self, 0))]
        else: return [str(self.shape)]
    def parameter_names(self, add_self=False, adjust_for_printing=False):
        if adjust_for_printing:
@ -267,7 +271,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        return [t._short() for t in self._tied_to_] or ['']
    def __repr__(self, *args, **kwargs):
        name = "\033[1m{x:s}\033[0;0m:\n".format(
-                            x=self.hirarchy_name())
+                            x=self.hierarchy_name())
        return name + super(Param, self).__repr__(*args, **kwargs)
    def _ties_for(self, rav_index):
        # size = sum(p.size for p in self._tied_to_)
@ -301,12 +305,12 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        gen = map(lambda x: " ".join(map(str, x)), gen)
        return reduce(lambda a, b:max(a, len(b)), gen, len(header))
    def _max_len_values(self):
-        return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name()))
+        return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hierarchy_name()))
    def _max_len_index(self, ind):
        return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
    def _short(self):
        # short string to print
-        name = self.hirarchy_name()
+        name = self.hierarchy_name()
        if self._realsize_ < 2:
            return name
        ind = self._indices()
@ -329,8 +333,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
        if lp is None: lp = self._max_len_names(prirs, __tie_name__)
        sep = '-'
        header_format = "  {i:{5}^{2}s}  |  \033[1m{x:{5}^{1}s}\033[0;0m  |  {c:{5}^{0}s}  |  {p:{5}^{4}s}  |  {t:{5}^{3}s}"
-        if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp)  # nice header for printing
+        if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp)  # nice header for printing
-        else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__)  # nice header for printing
+        else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__)  # nice header for printing
        if not ties: ties = itertools.cycle([''])
        return "\n".join([header] + ["  {i!s:^{3}s}  |  {x: >{1}.{2}g}  |  {c:^{0}s}  |  {p:^{5}s}  |  {t:^{4}s}  ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)])  # return all the constraints with right indices
        # except: return super(Param, self).__str__()
@ -345,7 +349,8 @@ class ParamConcatenation(object):
        See :py:class:`GPy.core.parameter.Param` for more details on constraining.
        """
        # self.params = params
-        self.params = ParamList([])
+        from lists_and_dicts import ArrayList
        self.params = ArrayList([])
        for p in params:
            for p in p.flattened_parameters:
                if p not in self.params:
@ -353,6 +358,21 @@ class ParamConcatenation(object):
        self._param_sizes = [p.size for p in self.params]
        startstops = numpy.cumsum([0] + self._param_sizes)
        self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
        parents = dict()
        for p in self.params:
            if p.has_parent():
                parent = p._direct_parent_
                level = 0
                while parent is not None:
                    if parent in parents:
                        parents[parent] = max(level, parents[parent])
                    else:
                        parents[parent] = level
                    level += 1
                    parent = parent._direct_parent_
        import operator
        self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
    #===========================================================================
    # Get/set items, enable broadcasting
    #===========================================================================
@ -366,24 +386,26 @@ class ParamConcatenation(object):
            val = val._vals()
        ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
        vals = self._vals(); vals[s] = val; del val
-        [numpy.place(p, ind[ps], vals[ps]) and update and p._notify_observers()
+        [numpy.place(p, ind[ps], vals[ps])
         for p, ps in zip(self.params, self._param_slices_)]
        if update:
            self.update_all_params()
    def _vals(self):
        return numpy.hstack([p._get_params() for p in self.params])
    #===========================================================================
    # parameter operations:
    #===========================================================================
    def update_all_params(self):
-        for p in self.params:
+        for par in self.parents:
-            p._notify_observers()
+            par._notify_observers(-numpy.inf)
-
+        
    def constrain(self, constraint, warning=True):
-        [param.constrain(constraint, update=False) for param in self.params]
+        [param.constrain(constraint, trigger_parent=False) for param in self.params]
        self.update_all_params()
    constrain.__doc__ = Param.constrain.__doc__
    def constrain_positive(self, warning=True):
-        [param.constrain_positive(warning, update=False) for param in self.params]
+        [param.constrain_positive(warning, trigger_parent=False) for param in self.params]
        self.update_all_params()
    constrain_positive.__doc__ = Param.constrain_positive.__doc__
@ -393,12 +415,12 @@ class ParamConcatenation(object):
    fix = constrain_fixed
    def constrain_negative(self, warning=True):
-        [param.constrain_negative(warning, update=False) for param in self.params]
+        [param.constrain_negative(warning, trigger_parent=False) for param in self.params]
        self.update_all_params()
    constrain_negative.__doc__ = Param.constrain_negative.__doc__
    def constrain_bounded(self, lower, upper, warning=True):
-        [param.constrain_bounded(lower, upper, warning, update=False) for param in self.params]
+        [param.constrain_bounded(lower, upper, warning, trigger_parent=False) for param in self.params]
        self.update_all_params()
    constrain_bounded.__doc__ = Param.constrain_bounded.__doc__
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -2,35 +2,70 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np
 __updated__ = '2013-12-16'
 class HierarchyError(Exception):
    """
    Gets thrown when something is wrong with the parameter hierarchy
    """
 def adjust_name_for_printing(name):
    if name is not None:
        return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "")
    return ''
 class Observable(object):
    _updated = True
    def __init__(self, *args, **kwargs):
-        from collections import defaultdict
+        self._observer_callables_ = []
        self._observer_callables_ = defaultdict(list)
    def add_observer(self, observer, callble):
        self._observer_callables_[observer].append(callble)
    def add_observer(self, observer, callble, priority=0):
        self._insert_sorted(priority, observer, callble)
    def remove_observer(self, observer, callble=None):
-        if observer in self._observer_callables_:
+        to_remove = []
-            if callble is None:
+        for p, obs, clble in self._observer_callables_:
-                del self._observer_callables_[observer]
+            if callble is not None:
-            elif callble in self._observer_callables_[observer]:
+                if (obs == observer) and (callble == clble):
-                self._observer_callables_[observer].remove(callble)
+                    to_remove.append((p, obs, clble))
-                if len(self._observer_callables_[observer]) == 0:
+            else:
-                    self.remove_observer(observer)
+                if obs is observer:
-
+                    to_remove.append((p, obs, clble))
-    def _notify_observers(self):
+        for r in to_remove:
-        [[callble(self) for callble in callables]
+            self._observer_callables_.remove(r)
-         for callables in self._observer_callables_.itervalues()]
+                
    def _notify_observers(self, which=None, min_priority=None):
        """
        Notifies all observers. Which is the element, which kicked off this 
        notification loop.
        NOTE: notifies only observers with priority p > min_priority!
                                                    ^^^^^^^^^^^^^^^^
        :param which: object, which started this notification loop
        :param min_priority: only notify observers with priority > min_priority
                             if min_priority is None, notify all observers in order
        """
        if which is None:
            which = self
        if min_priority is None:
            [callble(which) for _, _, callble in self._observer_callables_]
        else:
            for p, _, callble in self._observer_callables_:
                if p <= min_priority:
                    break
                callble(which)
    def _insert_sorted(self, p, o, c):
        ins = 0
        for pr, _, _ in self._observer_callables_:
            if p > pr:
                break
            ins += 1
        self._observer_callables_.insert(ins, (p, o, c))
 class Pickleable(object):
    def _getstate(self):
        """
@ -95,11 +130,11 @@ class Nameable(Parentable):
        self._name = name
        if self.has_parent():
            self._direct_parent_._name_changed(self, from_name)
-    def hirarchy_name(self, adjust_for_printing=True):
+    def hierarchy_name(self, adjust_for_printing=True):
        if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
        else: adjust = lambda x: x
        if self.has_parent():
-            return self._direct_parent_.hirarchy_name() + "." + adjust(self.name)
+            return self._direct_parent_.hierarchy_name() + "." + adjust(self.name)
        return adjust(self.name)
@ -156,7 +191,7 @@ class Constrainable(Nameable, Indexable):
    #===========================================================================
    # Fixing Parameters:
    #===========================================================================
-    def constrain_fixed(self, value=None, warning=True):
+    def constrain_fixed(self, value=None, warning=True, trigger_parent=True):
        """
        Constrain this paramter to be fixed to the current value it carries.
@ -164,7 +199,7 @@ class Constrainable(Nameable, Indexable):
        """
        if value is not None:
            self[:] = value
-        self.constrain(__fixed__, warning=warning)
+        self.constrain(__fixed__, warning=warning, trigger_parent=trigger_parent)
        rav_i = self._highest_parent_._raveled_index_for(self)
        self._highest_parent_._set_fixed(rav_i)
    fix = constrain_fixed
@ -178,20 +213,17 @@ class Constrainable(Nameable, Indexable):
    unfix = unconstrain_fixed
    def _set_fixed(self, index):
        import numpy as np
        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
        self._fixes_[index] = FIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED
    def _set_unfixed(self, index):
        import numpy as np
        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
        # rav_i = self._raveled_index_for(param)[index]
        self._fixes_[index] = UNFIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED
    def _connect_fixes(self):
        import numpy as np
        fixed_indices = self.constraints[__fixed__]
        if fixed_indices.size > 0:
            self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED
@ -205,9 +237,9 @@ class Constrainable(Nameable, Indexable):
    #===========================================================================
    # Prior Operations
    #===========================================================================
-    def set_prior(self, prior, warning=True, update=True):
+    def set_prior(self, prior, warning=True, trigger_parent=True):
        repriorized = self.unset_priors()
-        self._add_to_index_operations(self.priors, repriorized, prior, warning, update)
+        self._add_to_index_operations(self.priors, repriorized, prior, warning)
    def unset_priors(self, *priors):
        return self._remove_from_index_operations(self.priors, priors)
@ -221,7 +253,6 @@ class Constrainable(Nameable, Indexable):
    def _log_prior_gradients(self):
        """evaluate the gradients of the priors"""
        import numpy as np
        if self.priors.size > 0:
            x = self._get_params()
            ret = np.zeros(x.size)
@ -233,7 +264,7 @@ class Constrainable(Nameable, Indexable):
    # Constrain operations -> done
    #===========================================================================
-    def constrain(self, transform, warning=True, update=True):
+    def constrain(self, transform, warning=True, trigger_parent=True):
        """
        :param transform: the :py:class:`GPy.core.transformations.Transformation`
                          to constrain the this parameter to.
@ -243,9 +274,9 @@ class Constrainable(Nameable, Indexable):
        :py:class:`GPy.core.transformations.Transformation`.
        """
        if isinstance(transform, Transformation):
-            self._set_params(transform.initialize(self._get_params()), update=False)
+            self._set_params(transform.initialize(self._get_params()), trigger_parent=trigger_parent)
        reconstrained = self.unconstrain()
-        self._add_to_index_operations(self.constraints, reconstrained, transform, warning, update)
+        self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
    def unconstrain(self, *transforms):
        """
@ -256,30 +287,30 @@ class Constrainable(Nameable, Indexable):
        """
        return self._remove_from_index_operations(self.constraints, transforms)
-    def constrain_positive(self, warning=True, update=True):
+    def constrain_positive(self, warning=True, trigger_parent=True):
        """
        :param warning: print a warning if re-constraining parameters.
        Constrain this parameter to the default positive constraint.
        """
-        self.constrain(Logexp(), warning=warning, update=update)
+        self.constrain(Logexp(), warning=warning, trigger_parent=trigger_parent)
-    def constrain_negative(self, warning=True, update=True):
+    def constrain_negative(self, warning=True, trigger_parent=True):
        """
        :param warning: print a warning if re-constraining parameters.
        Constrain this parameter to the default negative constraint.
        """
-        self.constrain(NegativeLogexp(), warning=warning, update=update)
+        self.constrain(NegativeLogexp(), warning=warning, trigger_parent=trigger_parent)
-    def constrain_bounded(self, lower, upper, warning=True, update=True):
+    def constrain_bounded(self, lower, upper, warning=True, trigger_parent=True):
        """
        :param lower, upper: the limits to bound this parameter to
        :param warning: print a warning if re-constraining parameters.
        Constrain this parameter to lie within the given range.
        """
-        self.constrain(Logistic(lower, upper), warning=warning, update=update)
+        self.constrain(Logistic(lower, upper), warning=warning, trigger_parent=trigger_parent)
    def unconstrain_positive(self):
        """
@ -309,17 +340,15 @@ class Constrainable(Nameable, Indexable):
        for p in self._parameters_:
            p._parent_changed(parent)
-    def _add_to_index_operations(self, which, reconstrained, transform, warning, update):
+    def _add_to_index_operations(self, which, reconstrained, transform, warning):
        if warning and reconstrained.size > 0:
            # TODO: figure out which parameters have changed and only print those
            print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
        which.add(transform, self._raveled_index())
        if update:
            self._notify_observers()
    def _remove_from_index_operations(self, which, transforms):
        if len(transforms) == 0:
            transforms = which.properties()
        import numpy as np
        removed = np.empty((0,), dtype=int)
        for t in transforms:
            unconstrained = which.remove(t, self._raveled_index())
@ -329,12 +358,72 @@ class Constrainable(Nameable, Indexable):
        return removed
 class OptimizationHandlable(Constrainable, Observable):
    def _get_params_transformed(self):
        # transformed parameters (apply transformation rules)
        p = self._get_params()
        [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes():
            return p[self._fixes_]
        return p
    def _set_params_transformed(self, p):
        # inverse apply transformations for parameters and set the resulting parameters
        self._set_params(self._untransform_params(p))
    def _size_transformed(self):
        return self.size - self.constraints[__fixed__].size
    def _untransform_params(self, p):
        p = p.copy()
        if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
        [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        return p
    def _get_params(self):
        # don't overwrite this anymore!
        if not self.size:
            return np.empty(shape=(0,), dtype=np.float64)
        return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
-class Parameterizable(Constrainable, Observable):
+    def _set_params(self, params, trigger_parent=True):
        # don't overwrite this anymore!
        raise NotImplementedError, "This needs to be implemented in Param and Parametrizable"
    #===========================================================================
    # Optimization handles:
    #===========================================================================
    def _get_param_names(self):
        n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
        return n
    def _get_param_names_transformed(self):
        n = self._get_param_names()
        if self._has_fixes():
            return n[self._fixes_]
        return n
    #===========================================================================
    # Randomizeable
    #===========================================================================
    def randomize(self):
        """
        Randomize the model.
        Make this draw from the prior if one exists, else draw from N(0,1)
        """
        # first take care of all parameters (from N(0,1))
        # x = self._get_params_transformed()
        x = np.random.randn(self._size_transformed())
        x = self._untransform_params(x)
        # now draw from prior where possible
        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
        self._set_params(x)
        # self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
 class Parameterizable(OptimizationHandlable):
    def __init__(self, *args, **kwargs):
        super(Parameterizable, self).__init__(*args, **kwargs)
-        from GPy.core.parameterization.array_core import ParamList
+        from GPy.core.parameterization.lists_and_dicts import ArrayList
-        _parameters_ = ParamList()
+        _parameters_ = ArrayList()
        self._added_names_ = set()
    def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
@ -357,7 +446,7 @@ class Parameterizable(Constrainable, Observable):
                if pname in self._added_names_:
                    del self.__dict__[pname]
                    self._add_parameter_name(param)
-        else:
+        elif pname not in dir(self):
            self.__dict__[pname] = param
            self._added_names_.add(pname)
@ -377,42 +466,147 @@ class Parameterizable(Constrainable, Observable):
        import itertools
        [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
    def _set_params(self, params, trigger_parent=True):
        import itertools
        [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
        if trigger_parent: min_priority = None
        else: min_priority = -np.inf
        self._notify_observers(None, min_priority)
    def _set_gradient(self, g):
        import itertools
        [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
    def add_parameter(self, param, index=None):
        """
        :param parameters:  the parameters to add
        :type parameters:   list of or one :py:class:`GPy.core.param.Param`
        :param [index]:     index of where to put parameters
    def _get_params(self):
        import numpy as np
        # don't overwrite this anymore!
        if not self.size:
            return np.empty(shape=(0,), dtype=np.float64)
        return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
-    def _set_params(self, params, update=True):
+        Add all parameters to this param class, you can insert parameters
-        # don't overwrite this anymore!
+        at any given index using the :func:`list.insert` syntax
-        import itertools
+        """
-        [p._set_params(params[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
+        # if param.has_parent():
-        self._notify_parameters_changed()
+        #    raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
        if param in self._parameters_ and index is not None:
            self.remove_parameter(param)
            self.add_parameter(param, index)
        elif param not in self._parameters_:
            if param.has_parent():
                parent = param._direct_parent_
                while parent is not None:
                    if parent is self:
                        raise HierarchyError, "You cannot add a parameter twice into the hirarchy"
                    parent = parent._direct_parent_
                param._direct_parent_.remove_parameter(param)
            # make sure the size is set
            if index is None:
                self.constraints.update(param.constraints, self.size)
                self.priors.update(param.priors, self.size)
                self._parameters_.append(param)
            else:
                start = sum(p.size for p in self._parameters_[:index])
                self.constraints.shift_right(start, param.size)
                self.priors.shift_right(start, param.size)
                self.constraints.update(param.constraints, start)
                self.priors.update(param.priors, start)
                self._parameters_.insert(index, param)
            param.add_observer(self, self._pass_through_notify_observers, -np.inf)
            self.size += param.size
            self._connect_parameters()
            self._notify_parent_change()
            self._connect_fixes()
        else:
            raise RuntimeError, """Parameter exists already added and no copy made"""
    def add_parameters(self, *parameters):
        """
        convenience method for adding several
        parameters without gradient specification
        """
        [self.add_parameter(p) for p in parameters]
    def remove_parameter(self, param):
        """
        :param param: param object to remove from being a parameter of this parameterized object.
        """
        if not param in self._parameters_:
            raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
        start = sum([p.size for p in self._parameters_[:param._parent_index_]])
        self._remove_parameter_name(param)
        self.size -= param.size
        del self._parameters_[param._parent_index_]
        param._disconnect_parent()
        param.remove_observer(self, self._pass_through_notify_observers)
        self.constraints.shift_left(start, param.size)
        self._connect_fixes()
        self._connect_parameters()
        self._notify_parent_change()
        parent = self._direct_parent_
        while parent is not None:
            parent._connect_fixes()
            parent._connect_parameters()
            parent._notify_parent_change()
            parent = parent._direct_parent_
    def _connect_parameters(self):
        # connect parameterlist to this parameterized object
        # This just sets up the right connection for the params objects
        # to be used as parameters
        # it also sets the constraints for each parameter to the constraints 
        # of their respective parents 
        if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
            # no parameters for this class
            return
        sizes = [0]
        self._param_slices_ = []
        for i, p in enumerate(self._parameters_):
            p._direct_parent_ = self
            p._parent_index_ = i
            sizes.append(p.size + sizes[-1])
            self._param_slices_.append(slice(sizes[-2], sizes[-1]))
            self._add_parameter_name(p)
    #===========================================================================
    # notification system
    #===========================================================================
    def _parameters_changed_notification(self, which):
        self.parameters_changed()
    def _pass_through_notify_observers(self, which):
        self._notify_observers(which)
    #===========================================================================
    # TODO: not working yet
    #===========================================================================
    def copy(self):
        """Returns a (deep) copy of the current model"""
        import copy
        from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
-        from .array_core import ParamList
+        from .lists_and_dicts import ArrayList
        dc = dict()
        for k, v in self.__dict__.iteritems():
-            if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names():
+            if k not in ['_direct_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names():
                if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
                    dc[k] = v.copy()
                else:
                    dc[k] = copy.deepcopy(v)
            if k == '_parameters_':
                params = [p.copy() for p in v]
-        
+            
        dc['_direct_parent_'] = None
        dc['_parent_index_'] = None
-        dc['_parameters_'] = ParamList()
+        dc['_observer_callables_'] = []
        dc['_parameters_'] = ArrayList()
        dc['constraints'].clear()
        dc['priors'].clear()
        dc['size'] = 0
@ -421,15 +615,10 @@ class Parameterizable(Constrainable, Observable):
        s.__dict__ = dc
        for p in params:
            import ipdb;ipdb.set_trace()
            s.add_parameter(p)
        return s
    def _notify_parameters_changed(self):
        self.parameters_changed()
        self._notify_observers()
        if self.has_parent():
            self._direct_parent_._notify_parameters_changed()
    def parameters_changed(self):
        """
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -7,9 +7,9 @@ import cPickle
 import itertools
 from re import compile, _pattern_type
 from param import ParamConcatenation
-from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable
+from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing, Gradcheckable
 from transformations import __fixed__
-from array_core import ParamList
+from lists_and_dicts import ArrayList
 class Parameterized(Parameterizable, Pickleable, Gradcheckable):
    """
@ -56,8 +56,9 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
    def __init__(self, name=None, *a, **kw):
        super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
        self._in_init_ = True
-        self._parameters_ = ParamList()
+        self._parameters_ = ArrayList()
        self.size = sum(p.size for p in self._parameters_)
        self.add_observer(self, self._parameters_changed_notification, -100)
        if not self._has_fixes():
            self._fixes_ = None
        self._param_slices_ = []
@ -65,7 +66,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
        del self._in_init_
    def build_pydot(self, G=None):
-        import pydot
+        import pydot  # @UnresolvedImport
        iamroot = False
        if G is None:
            G = pydot.Dot(graph_type='digraph')
@ -87,88 +88,6 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
            return G
        return node
    def add_parameter(self, param, index=None):
        """
        :param parameters:  the parameters to add
        :type parameters:   list of or one :py:class:`GPy.core.param.Param`
        :param [index]:     index of where to put parameters
        Add all parameters to this param class, you can insert parameters
        at any given index using the :func:`list.insert` syntax
        """
        # if param.has_parent():
        #    raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
        if param in self._parameters_ and index is not None:
            self.remove_parameter(param)
            self.add_parameter(param, index)
        elif param not in self._parameters_:
            # make sure the size is set
            if index is None:
                self.constraints.update(param.constraints, self.size)
                self.priors.update(param.priors, self.size)
                self._parameters_.append(param)
            else:
                start = sum(p.size for p in self._parameters_[:index])
                self.constraints.shift_right(start, param.size)
                self.priors.shift_right(start, param.size)
                self.constraints.update(param.constraints, start)
                self.priors.update(param.priors, start)
                self._parameters_.insert(index, param)
            self.size += param.size
        else:
            raise RuntimeError, """Parameter exists already added and no copy made"""
        self._connect_parameters()
        self._notify_parent_change()
        self._connect_fixes()
    def add_parameters(self, *parameters):
        """
        convenience method for adding several
        parameters without gradient specification
        """
        [self.add_parameter(p) for p in parameters]
    def remove_parameter(self, param):
        """
        :param param: param object to remove from being a parameter of this parameterized object.
        """
        if not param in self._parameters_:
            raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
        start = sum([p.size for p in self._parameters_[:param._parent_index_]])
        self._remove_parameter_name(param)
        self.size -= param.size
        del self._parameters_[param._parent_index_]
        param._disconnect_parent()
        param.remove_observer(self, self._notify_parameters_changed)
        self.constraints.shift_left(start, param.size)
        self._connect_fixes()
        self._connect_parameters()
        self._notify_parent_change()
    def _connect_parameters(self):
        # connect parameterlist to this parameterized object
        # This just sets up the right connection for the params objects
        # to be used as parameters
        # it also sets the constraints for each parameter to the constraints 
        # of their respective parents 
        if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
            # no parameters for this class
            return
        sizes = [0]
        self._param_slices_ = []
        for i, p in enumerate(self._parameters_):
            p._direct_parent_ = self
            p._parent_index_ = i
            sizes.append(p.size + sizes[-1])
            self._param_slices_.append(slice(sizes[-2], sizes[-1]))
            self._add_parameter_name(p)
    #===========================================================================
    # Pickling operations
    #===========================================================================
@ -185,6 +104,11 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
        else:
            cPickle.dump(self, f, protocol)
    def copy(self):
        c = super(Parameterized, self).copy()
        c.add_observer(c, c._parameters_changed_notification, -100)
        return c
    def __getstate__(self):
        if self._has_get_set_state():
            return self._getstate()
@ -237,42 +161,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
                g[self._offset_for(p) + numpy.array(list(i))] += g[self._raveled_index_for(t)]
        if self._has_fixes(): return g[self._fixes_]
        return g
-    #===========================================================================
+
    # Optimization handles:
    #===========================================================================
    def _get_param_names(self):
        n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
        return n
    def _get_param_names_transformed(self):
        n = self._get_param_names()
        if self._has_fixes():
            return n[self._fixes_]
        return n
    def _get_params_transformed(self):
        # transformed parameters (apply transformation rules)
        p = self._get_params()
        [numpy.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes():
            return p[self._fixes_]
        return p
    def _set_params_transformed(self, p):
        # inverse apply transformations for parameters and set the resulting parameters
        self._set_params(self._untransform_params(p))
    def _untransform_params(self, p):
        p = p.copy()
        if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
        [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        return p
    #===========================================================================
    # Indexable Handling
    #===========================================================================
    def _backtranslate_index(self, param, ind):
        # translate an index in parameterized indexing into the index of param
        ind = ind - self._offset_for(param)
        ind = ind[ind >= 0]
        internal_offset = param._internal_offset()
        ind = ind[ind < param.size + internal_offset]
        return ind
    def _offset_for(self, param):
        # get the offset in the parameterized index array for param
        if param.has_parent():
@ -297,34 +186,22 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
        this is not in the global view of things!
        """
        return numpy.r_[:self.size]
-    #===========================================================================
+    
    # Fixing parameters:
    #===========================================================================
    def _fixes_for(self, param):
        if self._has_fixes():
            return self._fixes_[self._raveled_index_for(param)]
        return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
    #===========================================================================
    # Convenience for fixed, tied checking of param:
    #===========================================================================
    def fixed_indices(self):
        return np.array([x.is_fixed for x in self._parameters_])
    def _is_fixed(self, param):
        # returns if the whole param is fixed
        if not self._has_fixes():
            return False
        return not self._fixes_[self._raveled_index_for(param)].any()
        # return not self._fixes_[self._offset_for(param): self._offset_for(param)+param._realsize_].any()
    @property
    def is_fixed(self):
        for p in self._parameters_:
            if not p.is_fixed: return False
        return True
    def _get_original(self, param):
        # if advanced indexing is activated it happens that the array is a copy
        # you can retrieve the original param through this method, by passing
        # the copy here
        return self._parameters_[param._parent_index_]
    #===========================================================================
    # Get/set parameters:
    #===========================================================================
@ -352,9 +229,13 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
        return ParamConcatenation(paramlist)
    def __setitem__(self, name, value, paramlist=None):
-        try: param = self.__getitem__(name, paramlist)
+        if isinstance(name, slice):
-        except AttributeError as a: raise a
+            self[''][name] = value
-        param[:] = value
+        else:
            try: param = self.__getitem__(name, paramlist)
            except AttributeError as a: raise a
            param[:] = value
    def __setattr__(self, name, val):
        # override the default behaviour, if setting a param, so broadcasting can by used        
        if hasattr(self, '_parameters_'):
@ -365,7 +246,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
    # Printing:
    #===========================================================================
    def _short(self):
-        return self.hirarchy_name()
+        return self.hierarchy_name()
    @property
    def flattened_parameters(self):
        return [xi for x in self._parameters_ for xi in x.flattened_parameters]
@ -373,11 +254,6 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
    def _parameter_sizes_(self):
        return [x.size for x in self._parameters_]
    @property
    def size_transformed(self):
        if self._has_fixes():
            return sum(self._fixes_)
        return self.size
    @property
    def parameter_shapes(self):
        return [xi for x in self._parameters_ for xi in x.parameter_shapes]
    @property
@ -404,7 +280,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
        cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
        tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
        pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
-        format_spec = "  \033[1m{{name:<{0}s}}\033[0;0m  |  {{desc:^{1}s}}  |  {{const:^{2}s}}  |  {{pri:^{3}s}}  |  {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
+        format_spec = "  \033[1m{{name:<{0}s}}\033[0;0m  |  {{desc:>{1}s}}  |  {{const:^{2}s}}  |  {{pri:^{3}s}}  |  {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
        to_print = []
        for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
            to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -64,6 +64,36 @@ class Gaussian(Prior):
        return np.random.randn(n) * self.sigma + self.mu
 class Uniform(Prior):
    domain = _REAL
    _instances = []
    def __new__(cls, lower, upper): # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().lower == lower and instance().upper == upper:
                    return instance()
        o = super(Prior, cls).__new__(cls, lower, upper)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()
    def __init__(self, lower, upper):
        self.lower = float(lower)
        self.upper = float(upper)
    def __str__(self):
        return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
    def lnpdf(self, x):
        region = (x>=self.lower) * (x<=self.upper)
        return region
    def lnpdf_grad(self, x):
        return np.zeros(x.shape)
    def rvs(self, n):
        return np.random.uniform(self.lower, self.upper, size=n)
 class LogGaussian(Prior):
    """
    Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@ -6,8 +6,11 @@ import numpy as np
 from domains import _POSITIVE,_NEGATIVE, _BOUNDED
 import weakref
 import sys
 #_lim_val = -np.log(sys.float_info.epsilon)
 _exp_lim_val = np.finfo(np.float64).max
-_lim_val = np.log(_exp_lim_val)#-np.log(sys.float_info.epsilon)
+_lim_val = np.log(_exp_lim_val)#
 #===============================================================================
 # Fixing constants
@ -35,7 +38,6 @@ class Transformation(object):
        """ produce a sensible initial value for f(x)"""
        raise NotImplementedError
    def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        import matplotlib.pyplot as plt
        from ...plotting.matplot_dep import base_plots
@ -52,7 +54,7 @@ class Transformation(object):
 class Logexp(Transformation):
    domain = _POSITIVE
    def f(self, x):
-        return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
+        return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val))))
        #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
    def finv(self, f):
        return np.where(f>_lim_val, f, np.log(np.exp(f) - 1.))
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -85,11 +85,11 @@ class SparseGP(GP):
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
-    def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
+    def _raw_predict(self, Xnew, full_cov=False):
        """
        Make a prediction for the latent function values
        """
-        if X_variance_new is None:
+        if not isinstance(Xnew, VariationalPosterior):
            Kx = self.kern.K(self.Z, Xnew)
            mu = np.dot(Kx.T, self.posterior.woodbury_vector)
            if full_cov:
@ -100,13 +100,13 @@ class SparseGP(GP):
                Kxx = self.kern.Kdiag(Xnew)
                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
        else:
-            Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
+            Kx = self.kern.psi1(self.Z, Xnew)
-            mu = np.dot(Kx, self.Cpsi1V)
+            mu = np.dot(Kx, self.posterior.woodbury_vector)
            if full_cov:
                raise NotImplementedError, "TODO"
            else:
-                Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
+                Kxx = self.kern.psi0(self.Z, Xnew)
-                psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
+                psi2 = self.kern.psi2(self.Z, Xnew)
                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
        return mu, var
@ -114,14 +114,12 @@ class SparseGP(GP):
    def _getstate(self):
        """
        Get the current state of the class,
        here just all the indices, rest can get recomputed
        """
-        return GP._getstate(self) + [self.Z,
+        return GP._getstate(self) + [
-                self.num_inducing,
+                self.Z,
-                self.X_variance]
+                self.num_inducing]
    def _setstate(self, state):
        self.X_variance = state.pop()
        self.num_inducing = state.pop()
        self.Z = state.pop()
        GP._setstate(self, state)
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -187,10 +187,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
    _np.random.seed(1234)
    x = _np.linspace(0, 4 * _np.pi, N)[:, None]
-    s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x)))
+    s1 = _np.vectorize(lambda x: _np.sin(x))
    s2 = _np.vectorize(lambda x: _np.cos(x)**2)
    s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
-    sS = _np.vectorize(lambda x: x*_np.sin(x))
+    sS = _np.vectorize(lambda x: _np.cos(x))
    s1 = s1(x)
    s2 = s2(x)
@ -202,7 +202,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
    s3 -= s3.mean(); s3 /= s3.std(0)
    sS -= sS.mean(); sS /= sS.std(0)
-    S1 = _np.hstack([s1, s2, sS])
+    S1 = _np.hstack([s1, sS])
    S2 = _np.hstack([s2, s3, sS])
    S3 = _np.hstack([s3, sS])
@ -270,7 +270,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
    from GPy import kern
    from GPy.models import BayesianGPLVM
-    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
+    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
    Y = Ylist[0]
    k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
@ -294,7 +294,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
    from GPy.models import BayesianGPLVM
    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
-    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
+    D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
    _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
    Y = Ylist[0]
    k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -60,8 +60,7 @@ class VarDTC(object):
        _, output_dim = Y.shape
        #see whether we've got a different noise variance for each datum
-        beta = 1./np.squeeze(likelihood.variance)
+        beta = 1./np.fmax(likelihood.variance, 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
@ -214,7 +213,7 @@ class VarDTCMissingData(object):
            psi2_all = None
        Ys, traces = self._Y(Y)
-        beta_all = 1./likelihood.variance
+        beta_all = 1./np.fmax(likelihood.variance, 1e-6)
        het_noise = beta_all.size != 1
        import itertools
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@ -73,7 +73,7 @@ class Kern(Parameterized):
        See GPy.plotting.matplot_dep.plot
        """
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        from ..plotting.matplot_dep import kernel_plots
+        from ...plotting.matplot_dep import kernel_plots
        kernel_plots.plot(self,*args)
    def plot_ARD(self, *args, **kw):
@ -112,10 +112,12 @@ class Kern(Parameterized):
        """
        assert isinstance(other, Kern), "only kernels can be added to kernels..."
        from add import Add
-        return Add([self, other], tensor)
+        kernels = []
-
+        if not tensor and isinstance(self, Add): kernels.extend(self._parameters_)
-    def __call__(self, X, X2=None):
+        else: kernels.append(self)
-        return self.K(X, X2)
+        if not tensor and isinstance(other, Add): kernels.extend(other._parameters_)
        else: kernels.append(other)
        return Add(kernels, tensor)
    def __mul__(self, other):
        """ Here we overload the '*' operator. See self.prod for more information"""
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -7,6 +7,8 @@ from scipy import weave
 from ...util.misc import param_to_array
 from stationary import Stationary
 from GPy.util.caching import Cache_this
 from ...core.parameterization import variational
 from rbf_psi_comp import ssrbf_psi_comp
 class RBF(Stationary):
    """
@ -18,7 +20,7 @@ class RBF(Stationary):
    """
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='RBF'):
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
        self.weave_options = {}
@ -36,76 +38,140 @@ class RBF(Stationary):
        return self.Kdiag(variational_posterior.mean)
    def psi1(self, Z, variational_posterior):
-        _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
+        if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
            psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
        else:
            _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
        return psi1
    def psi2(self, Z, variational_posterior):
-        _, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
+        if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
            psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
        else:
            _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
        return psi2
    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        l2 = self.lengthscale **2
+        # Spike-and-Slab GPLVM
        if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
            _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
            _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
            #contributions from psi0:
            self.variance.gradient = np.sum(dL_dpsi0)
            #from psi1
            self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
            self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) 
            #from psi2
            self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
            self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)        
            return
        elif isinstance(variational_posterior, variational.NormalPosterior):
            l2 = self.lengthscale **2
-        #contributions from psi0:
+            #contributions from psi0:
-        self.variance.gradient = np.sum(dL_dpsi0)
+            self.variance.gradient = np.sum(dL_dpsi0)
-        self.lengthscale.gradient = 0.
+            self.lengthscale.gradient = 0.
            #from psi1
            denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
            d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
            dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
            if self.ARD:
                self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
            else:
                self.lengthscale.gradient += dpsi1_dlength.sum()
            self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
            #from psi2
            S = variational_posterior.variance
            _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
            if not self.ARD:
                self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
            else:
                self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
            self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
        #from psi1
        denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
        d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
        dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
        if not self.ARD:
            self.lengthscale.gradient += dpsi1_dlength.sum()
        else:
-            self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
+            raise ValueError, "unknown distriubtion received for psi-statistics"
        self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
        #from psi2
        S = variational_posterior.variance
        denom, _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
        d_length = 2.*psi2[:, :, :, None] * (Zdist_sq * denom + mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * denom)
        #TODO: combine denom and l2 as denom_l2??
        #TODO: tidy the above!
        #TODO: tensordot below?
        dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
        if not self.ARD:
            self.lengthscale.gradient += dpsi2_dlength.sum()
        else:
            self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
        self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
    def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        l2 = self.lengthscale **2
+        # Spike-and-Slab GPLVM
        if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
            _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
            _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
            #psi1
            grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
            #psi2
            grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
            return grad
-        #psi1
+        elif isinstance(variational_posterior, variational.NormalPosterior):
-        denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
+            
-        denominator = l2 * denom
+            l2 = self.lengthscale **2
        dpsi1_dZ = -psi1[:, :, None] * (dist / denominator)
        grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
-        #psi2
+            #psi1
-        denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
+            denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
-        term1 = Zdist / l2 # M, M, Q
+            grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2))
        term2 = mudist / denom / l2 # N, M, M, Q
        dZ = psi2[:, :, :, None] * (term1[None, :, :, :] + term2) #N,M,M,Q
        grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
-        return grad
+            #psi2
            Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
            term1 = Zdist / l2 # M, M, Q
            S = variational_posterior.variance
            term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q
            grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2)
            return grad
        else:
            raise ValueError, "unknown distriubtion received for psi-statistics"
    def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        l2 = self.lengthscale **2
+        # Spike-and-Slab GPLVM
-        #psi1
+        if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
-        denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
+            ndata = variational_posterior.mean.shape[0]
-        tmp = psi1[:, :, None] / l2 / denom
+            
-        grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
+            _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-        grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
+            _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-        #psi2
+    
-        denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
+            #psi1
-        tmp = psi2[:, :, :, None] / l2 / denom
+            grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
-        grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * mudist).sum(1).sum(1)
+            grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
-        grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*mudist_sq - 1)).sum(1).sum(1)
+            grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
            #psi2
            grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
            grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
            grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
            return grad_mu, grad_S, grad_gamma
        elif isinstance(variational_posterior, variational.NormalPosterior):
            l2 = self.lengthscale **2
            #psi1
            denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
            tmp = psi1[:, :, None] / l2 / denom
            grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
            grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
            #psi2
            _, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
            S = variational_posterior.variance
            tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2)
            grad_mu += -2.*np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist)
            grad_S += np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1))
        else:
            raise ValueError, "unknown distriubtion received for psi-statistics"
        return grad_mu, grad_S
@ -113,61 +179,6 @@ class RBF(Stationary):
    #            Precomputations            #
    #---------------------------------------#
    #TODO: this function is unused, but it will be useful in the stationary class
    def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
        """
        A helper function for update_gradients_* methods
        Computes the derivative of the objective L wrt the lengthscales via
        dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
        assumes self._K_computations has just been called.
        This is only valid if self.ARD=True
        """
        target = np.zeros(self.input_dim)
        dvardLdK = self._K_dvar * dL_dK
        var_len3 = self.variance / np.power(self.lengthscale, 3)
        if X2 is None:
            # save computation for the symmetrical case
            dvardLdK = dvardLdK + dvardLdK.T
            code = """
            int q,i,j;
            double tmp;
            for(q=0; q<input_dim; q++){
              tmp = 0;
              for(i=0; i<num_data; i++){
                for(j=0; j<i; j++){
                  tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
                }
              }
              target(q) += var_len3(q)*tmp;
            }
            """
            num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
            X, dvardLdK, var_len3 = param_to_array(X, dvardLdK, var_len3)
            weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        else:
            code = """
            int q,i,j;
            double tmp;
            for(q=0; q<input_dim; q++){
              tmp = 0;
              for(i=0; i<num_data; i++){
                for(j=0; j<num_inducing; j++){
                  tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
                }
              }
              target(q) += var_len3(q)*tmp;
            }
            """
            num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
            X, X2, dvardLdK, var_len3 = param_to_array(X, X2, dvardLdK, var_len3)
            weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        return target
    @Cache_this(limit=1)
    def _psi1computations(self, Z, vp):
        mu, S = vp.mean, vp.variance
@ -180,7 +191,7 @@ class RBF(Stationary):
        return denom, dist, dist_sq, psi1
-    #@cache_this(ignore_args=(1,))
+    @Cache_this(limit=1, ignore_args=(0,))
    def _Z_distances(self, Z):
        Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
        Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
@ -200,7 +211,6 @@ class RBF(Stationary):
        #allocate memory for the things we want to compute
        mudist = np.empty((N, M, M, Q))
        mudist_sq = np.empty((N, M, M, Q))
        exponent = np.zeros((N,M,M))
        psi2 = np.empty((N, M, M))
        l2 = self.lengthscale **2
@ -212,7 +222,7 @@ class RBF(Stationary):
        code = """
        double tmp, exponent_tmp;
-        //#pragma omp parallel for private(tmp, exponent_tmp)
+        #pragma omp parallel for private(tmp, exponent_tmp)
        for (int n=0; n<N; n++)
        {
            for (int m=0; m<M; m++)
@ -253,8 +263,48 @@ class RBF(Stationary):
                     arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
                     type_converters=weave.converters.blitz, **self.weave_options)
-        return denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2
+        return Zdist, Zdist_sq, mudist, mudist_sq, psi2
-    def input_sensitivity(self):
+    def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2):
-        if self.ARD: return 1./self.lengthscale
+
-        else: return (1./self.lengthscale).repeat(self.input_dim)
+        #here's the einsum equivalent, it's ~3 times slower
        #return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
        result = np.zeros(self.input_dim)
        code = """
        double tmp;
        for(int q=0; q<Q; q++)
        {
            tmp = 0.0;
            #pragma omp parallel for reduction(+:tmp)
            for(int n=0; n<N; n++)
            {
                for(int m=0; m<M; m++)
                {
                    //diag terms
                    tmp += dL_dpsi2(n,m,m) * psi2(n,m,m) * (Zdist_sq(m,m,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,m,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
                    //off-diag terms
                    for(int mm=0; mm<m; mm++)
                    {
                        tmp += 2.0 * dL_dpsi2(n,m,mm) * psi2(n,m,mm) * (Zdist_sq(m,mm,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,mm,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
                    }
                }
            }
            result(q) = tmp;
        }
        """
        support_code = """
        #include <omp.h>
        #include <math.h>
        """
        N,Q = S.shape
        M = psi2.shape[-1]
        S = param_to_array(S)
        weave.inline(code, support_code=support_code, libraries=['gomp'],
                     arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
                     type_converters=weave.converters.blitz, **self.weave_options)
        return 2.*result*self.lengthscale
--- a/GPy/kern/_src/rbf_psi_comp/init.py
+++ b/GPy/kern/_src/rbf_psi_comp/init.py
@ -0,0 +1,2 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
--- a/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py
+++ b/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py
@ -0,0 +1,111 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 """
 The package for the psi statistics computation
 """
 import numpy as np
 def _Z_distances(Z):
    Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
    Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
    return Zhat, Zdist
 # def _psi1computations(self, Z, vp):
 #     mu, S = vp.mean, vp.variance
 #     l2 = lengthscale **2
 #     denom = S[:, None, :] / l2 + 1. # N,1,Q
 #     dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
 #     dist_sq = np.square(dist) / l2 / denom # N,M,Q
 #     exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
 #     psi1 = self.variance * np.exp(exponent) # N,M
 #     return denom, dist, dist_sq, psi1
 def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
    """
    Z - MxQ
    mu - NxQ
    S - NxQ
    gamma - NxQ
    """
    # here are the "statistics" for psi1 and psi2
    # Produced intermediate results:
    # _psi1                NxM
    # _dpsi1_dvariance     NxM
    # _dpsi1_dlengthscale  NxMxQ
    # _dpsi1_dZ            NxMxQ
    # _dpsi1_dgamma        NxMxQ
    # _dpsi1_dmu           NxMxQ
    # _dpsi1_dS            NxMxQ
    lengthscale2 = np.square(lengthscale)
    # psi1
    _psi1_denom = S[:, None, :] / lengthscale2 + 1.  # Nx1xQ
    _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
    _psi1_dist = Z[None, :, :] - mu[:, None, :]  # NxMxQ
    _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
    _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
    _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
    _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
    _psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
    _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
    _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
    _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
    _psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
    _psi1 = variance * np.exp(_psi1_exp_sum) # NxM
    _dpsi1_dvariance = _psi1 / variance # NxM
    _dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
    _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
    _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
    _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
    _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
    return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
 def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
    """
    Z - MxQ
    mu - NxQ
    S - NxQ
    gamma - NxQ
    """
    # here are the "statistics" for psi1 and psi2
    # Produced intermediate results:
    # _psi2                NxMxM
    # _psi2_dvariance      NxMxM
    # _psi2_dlengthscale   NxMxMxQ
    # _psi2_dZ             NxMxMxQ
    # _psi2_dgamma         NxMxMxQ
    # _psi2_dmu            NxMxMxQ
    # _psi2_dS             NxMxMxQ
    lengthscale2 = np.square(lengthscale)
    _psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
    _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
    _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
    # psi2
    _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
    _psi2_denom_sqrt = np.sqrt(_psi2_denom)
    _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
    _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
    _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
    _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
    _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
    _psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
    _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
    _psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ 
    _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
    _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
    _psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
    _dpsi2_dvariance = 2. * _psi2/variance # NxMxM
    _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
    _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
    _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
    _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
    _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
    return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale
--- a/GPy/kern/_src/ssrbf.py
+++ b/GPy/kern/_src/ssrbf.py
@ -7,6 +7,7 @@ import numpy as np
 from ...util.linalg import tdot
 from ...util.config import *
 from stationary import Stationary
 from rbf_psi_comp import ssrbf_psi_comp
 class SSRBF(Stationary):
    """
@ -54,101 +55,63 @@ class SSRBF(Stationary):
    #             PSI statistics            #
    #---------------------------------------#
-    def psi0(self, Z, posterior_variational):
+    def psi0(self, Z, variational_posterior):
-        ret = np.empty(posterior_variational.mean.shape[0])
+        ret = np.empty(variational_posterior.mean.shape[0])
        ret[:] = self.variance
        return ret
-    def psi1(self, Z, posterior_variational):
+    def psi1(self, Z, variational_posterior):
-        self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
+        _psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-        return self._psi1
+        return _psi1
-    def psi2(self, Z, posterior_variational):
+    def psi2(self, Z, variational_posterior):
-        self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
+        _psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-        return self._psi2
+        return _psi2
-    def dL_dpsi0_dmuSgamma(self, dL_dpsi0, Z, mu, S, gamma, target_mu, target_S, target_gamma):
+    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        pass
+        _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-
+        _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
    def dL_dpsi1_dmuSgamma(self, dL_dpsi1, Z, mu, S, gamma, target_mu, target_S, target_gamma):
        self._psi_computations(Z, mu, S, gamma)
        target_mu += (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1)
        target_S += (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1)
        target_gamma += (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1)
    def dL_dpsi2_dmuSgamma(self, dL_dpsi2, Z, mu, S, gamma, target_mu, target_S, target_gamma):
        """Think N,num_inducing,num_inducing,input_dim """
        self._psi_computations(Z, mu, S, gamma)
        target_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(mu.shape[0],-1,mu.shape[1]).sum(axis=1)
        target_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(S.shape[0],-1,S.shape[1]).sum(axis=1)
        target_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(gamma.shape[0],-1,gamma.shape[1]).sum(axis=1)
    def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
        self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
        #contributions from psi0:
        self.variance.gradient = np.sum(dL_dpsi0)
        #from psi1
-        self.variance.gradient += np.sum(dL_dpsi1 * self._dpsi1_dvariance)
+        self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
-        self.lengthscale.gradient = (dL_dpsi1[:,:,None]*self._dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) 
+        self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) 
        #from psi2
-        self.variance.gradient += (dL_dpsi2 * self._dpsi2_dvariance).sum()
+        self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
-        self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * self._dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
+        self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)        
        #from Kmm
        self._K_computations(Z, None)
        dvardLdK = self._K_dvar * dL_dKmm
        var_len3 = self.variance / (np.square(self.lengthscale)*self.lengthscale)
        self.variance.gradient += np.sum(dvardLdK)
        self.lengthscale.gradient += (np.square(Z[:,None,:]-Z[None,:,:])*dvardLdK[:,:,None]).reshape(-1,self.input_dim).sum(axis=0)*var_len3
-        
+    def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-    def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
+        _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
-        self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
+        _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
        #psi1
-        grad = (dL_dpsi1[:, :, None] * self._dpsi1_dZ).sum(axis=0)
+        grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
        #psi2
-        grad += (dL_dpsi2[:, :, :, None] * self._dpsi2_dZ).sum(axis=0).sum(axis=1)
+        grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
        grad += self.gradients_X(dL_dKmm, Z, None)
        return grad
-    def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
+    def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        ndata = posterior_variational.mean.shape[0]
+        ndata = variational_posterior.mean.shape[0]
-        self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
+        
        _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
        _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
        #psi1
-        grad_mu = (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1)
+        grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
-        grad_S = (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1)
+        grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
-        grad_gamma = (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1)
+        grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
        #psi2
-        grad_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
+        grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
-        grad_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
+        grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
-        grad_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
+        grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
        return grad_mu, grad_S, grad_gamma
    def gradients_X(self, dL_dK, X, X2=None):
        #if self._X is None or X.base is not self._X.base or X2 is not None:
        if X2==None:
            _K_dist = X[:,None,:] - X[None,:,:]
            _K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
            dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-2.*_K_dist/np.square(self.lengthscale))
            dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
        else:
            _K_dist = X[:,None,:] - X2[None,:,:]
            _K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
            dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-_K_dist/np.square(self.lengthscale))
            dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
        return dL_dX
    #---------------------------------------#
    #            Precomputations            #
@ -174,78 +137,3 @@ class SSRBF(Stationary):
            self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :])
        self._K_dvar = np.exp(-0.5 * self._K_dist2)
    #@cache_this(1)
    def _psi_computations(self, Z, mu, S, gamma):
        """
        Z - MxQ
        mu - NxQ
        S - NxQ
        gamma - NxQ
        """
        # here are the "statistics" for psi1 and psi2
        # Produced intermediate results:
        # _psi1                NxM
        # _dpsi1_dvariance     NxM
        # _dpsi1_dlengthscale  NxMxQ
        # _dpsi1_dZ            NxMxQ
        # _dpsi1_dgamma        NxMxQ
        # _dpsi1_dmu           NxMxQ
        # _dpsi1_dS            NxMxQ
        # _psi2                NxMxM
        # _psi2_dvariance      NxMxM
        # _psi2_dlengthscale   NxMxMxQ
        # _psi2_dZ             NxMxMxQ
        # _psi2_dgamma         NxMxMxQ
        # _psi2_dmu            NxMxMxQ
        # _psi2_dS             NxMxMxQ
        lengthscale2 = np.square(self.lengthscale)
        _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
        _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
        _psi2_Zdist_sq = np.square(_psi2_Zdist / self.lengthscale) # M,M,Q
        _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
        # psi1
        _psi1_denom = S[:, None, :] / lengthscale2 + 1.  # Nx1xQ
        _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
        _psi1_dist = Z[None, :, :] - mu[:, None, :]  # NxMxQ
        _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
        _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
        _psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
        _psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
        _psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
        _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
        _psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
        _psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
        _psi1_q = self.variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
        self._psi1 = self.variance * np.exp(_psi1_exp_sum) # NxM
        self._dpsi1_dvariance = self._psi1 / self.variance # NxM
        self._dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
        self._dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
        self._dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
        self._dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
        self._dpsi1_dlengthscale = 2.*self.lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
        # psi2
        _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
        _psi2_denom_sqrt = np.sqrt(_psi2_denom)
        _psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
        _psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
        _psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
        _psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
        _psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
        _psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
        _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
        _psi2_q = np.square(self.variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ 
        _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
        _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
        self._psi2 = np.square(self.variance) * np.exp(_psi2_exp_sum) # N,M,M
        self._dpsi2_dvariance = 2. * self._psi2/self.variance # NxMxM
        self._dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
        self._dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
        self._dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
        self._dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
        self._dpsi2_dlengthscale = 2.*self.lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -12,6 +12,35 @@ from scipy import integrate
 from ...util.caching import Cache_this
 class Stationary(Kern):
    """
    Stationary kernels (covariance functions).
    Stationary covariance fucntion depend only on r, where r is defined as 
      r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
    The covariance function k(x, x' can then be written k(r). 
    In this implementation, r is scaled by the lengthscales parameter(s):
      r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }. 
    By default, there's only one lengthscale: seaprate lengthscales for each
    dimension can be enables by setting ARD=True. 
    To implement a stationary covariance function using this class, one need
    only define the covariance function k(r), and it derivative. 
      ...
      def K_of_r(self, r):
          return foo
      def dK_dr(self, r):
          return bar
    The lengthscale(s) and variance parameters are added to the structure automatically. 
    """
    def __init__(self, input_dim, variance, lengthscale, ARD, name):
        super(Stationary, self).__init__(input_dim, name)
        self.ARD = ARD
@ -20,11 +49,11 @@ class Stationary(Kern):
                lengthscale = np.ones(1)
            else:
                lengthscale = np.asarray(lengthscale)
-                assert lengthscale.size == 1, "Only  lengthscale needed for non-ARD kernel"
+                assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
        else:
            if lengthscale is not None:
                lengthscale = np.asarray(lengthscale)
-                assert lengthscale.size in [1, input_dim], "Bad lengthscales"
+                assert lengthscale.size in [1, input_dim], "Bad number of lengthscales"
                if lengthscale.size != input_dim:
                    lengthscale = np.ones(input_dim)*lengthscale
            else:
@ -35,31 +64,32 @@ class Stationary(Kern):
        self.add_parameters(self.variance, self.lengthscale)
    def K_of_r(self, r):
-        raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class"
+        raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
    def dK_dr(self, r):
-        raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class"
+        raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
    @Cache_this(limit=5, ignore_args=())
    def K(self, X, X2=None):
        r = self._scaled_dist(X, X2)
        return self.K_of_r(r)
-    @Cache_this(limit=5, ignore_args=(0,))
+    @Cache_this(limit=3, ignore_args=())
-    def _dist(self, X, X2):
+    def dK_dr_via_X(self, X, X2):
-        if X2 is None:
+        #a convenience function, so we can cache dK_dr
-            X2 = X
+        return self.dK_dr(self._scaled_dist(X, X2))
        return X[:, None, :] - X2[None, :, :]
    @Cache_this(limit=5, ignore_args=(0,))
    def _unscaled_dist(self, X, X2=None):
        """
-        Compute the square distance between each row of X and X2, or between
+        Compute the Euclidean distance between each row of X and X2, or between
        each pair of rows of X if X2 is None.
        """
        if X2 is None:
            Xsq = np.sum(np.square(X),1)
-            return np.sqrt(-2.*tdot(X) + (Xsq[:,None] + Xsq[None,:]))
+            r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
            util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
            return np.sqrt(r2)
        else:
            X1sq = np.sum(np.square(X),1)
            X2sq = np.sum(np.square(X2),1)
@ -70,7 +100,7 @@ class Stationary(Kern):
        """
        Efficiently compute the scaled distance, r.
-        r = \sum_{q=1}^Q (x_q - x'q)^2/l_q^2
+        r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
        Note that if thre is only one lengthscale, l comes outside the sum. In
        this case we compute the unscaled distance first (in a separate
@ -84,7 +114,6 @@ class Stationary(Kern):
        else:
            return self._unscaled_dist(X, X2)/self.lengthscale
    def Kdiag(self, X):
        ret = np.empty(X.shape[0])
        ret[:] = self.variance
@ -95,20 +124,23 @@ class Stationary(Kern):
        self.lengthscale.gradient = 0.
    def update_gradients_full(self, dL_dK, X, X2=None):
        r = self._scaled_dist(X, X2)
        K = self.K_of_r(r)
-        rinv = self._inv_dist(X, X2)
+        self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
        dL_dr = self.dK_dr(r) * dL_dK
        #now the lengthscale gradient(s)
        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
        if self.ARD:
-            x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
+            #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
-            self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)
+            #d =  X[:, None, :] - X2[None, :, :]
            #x_xl3 = np.square(d) 
            #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
            tmp = dL_dr*self._inv_dist(X, X2)
            if X2 is None: X2 = X
            self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
        else:
-            x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
+            r = self._scaled_dist(X, X2)
-            self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
+            self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
        self.variance.gradient = np.sum(K * dL_dK)/self.variance
    def _inv_dist(self, X, X2=None):
        """
@ -116,7 +148,7 @@ class Stationary(Kern):
        diagonal, where we return zero (the distance on the diagonal is zero).
        This term appears in derviatives.
        """
-        dist = self._scaled_dist(X, X2)
+        dist = self._scaled_dist(X, X2).copy()
        if X2 is None:
            nondiag = util.diag.offdiag_view(dist)
            nondiag[:] = 1./nondiag
@ -128,10 +160,11 @@ class Stationary(Kern):
        """
        Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
        """
        r = self._scaled_dist(X, X2)
        invdist = self._inv_dist(X, X2)
-        dL_dr = self.dK_dr(r) * dL_dK
+        dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
-        #The high-memory numpy way: ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
+        #The high-memory numpy way:
        #d =  X[:, None, :] - X2[None, :, :]
        #ret = np.sum((invdist*dL_dr)[:,:,None]*d,1)/self.lengthscale**2
        #if X2 is None:
            #ret *= 2.
@ -141,7 +174,7 @@ class Stationary(Kern):
            tmp *= 2.
            X2 = X
        ret = np.empty(X.shape, dtype=np.float64)
-        [np.copyto(ret[:,q], np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), 1)) for q in xrange(self.input_dim)]
+        [np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
        ret /= self.lengthscale**2
        return ret
@ -214,7 +247,7 @@ class Matern52(Stationary):
    .. math::
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) 
       """
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
@ -225,7 +258,7 @@ class Matern52(Stationary):
    def dK_dr(self, r):
        return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
-    def Gram_matrix(self,F,F1,F2,F3,lower,upper):
+    def Gram_matrix(self, F, F1, F2, F3, lower, upper):
        """
        Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
--- a/GPy/kern/_src/sympykern.py
+++ b/GPy/kern/_src/sympykern.py
@ -76,34 +76,32 @@ class Sympykern(Kern):
            self.num_split_params = len(self._sp_theta_i)
            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
            # Add split parameters to the model.
            for theta in self._split_theta_names:
                # TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
                setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
-                self.add_parameters(getattr(self, theta))
+                self.add_parameter(getattr(self, theta))
                #setattr(self, theta, np.ones(self.output_dim))
            self.num_shared_params = len(self._sp_theta)
            for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
                self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
            #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
        else:
            self.num_split_params = 0
            self._split_theta_names = []
            self._sp_theta = thetas
            self.num_shared_params = len(self._sp_theta)
            #self.num_params = self.num_shared_params
        # Add parameters to the model.
        for theta in self._sp_theta:
            val = 1.0
            # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
            if param is not None:
                if param.has_key(theta):
                    val = param[theta]
            setattr(self, theta.name, Param(theta.name, val, None))
            self.add_parameters(getattr(self, theta.name))
        #deal with param            
        #self._set_params(self._get_params())
        # Differentiate with respect to parameters.
        derivative_arguments = self._sp_x + self._sp_theta
@ -113,7 +111,6 @@ class Sympykern(Kern):
        self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
        self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
        # This gives the parameters for the arg list.
        self.arg_list = self._sp_x + self._sp_z + self._sp_theta
        self.diag_arg_list = self._sp_x + self._sp_theta
@ -134,7 +131,7 @@ class Sympykern(Kern):
        return spkern(self._sp_k+other._sp_k)
    def _gen_code(self):
-
+        #fn_theano = theano_function([self.arg_lists], [self._sp_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
        self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
        for key in self.derivatives.keys():
            setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -150,37 +150,6 @@ class BayesianGPLVM(SparseGP):
        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
 class BayesianGPLVMWithMissingData(BayesianGPLVM):
    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
        Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
        from ..util.subarray_and_sorting import common_subarrays
        self.subarrays = common_subarrays(Y)
        import ipdb;ipdb.set_trace()
        BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs)
    def parameters_changed(self):
        super(BayesianGPLVM, self).parameters_changed()
        self._log_marginal_likelihood -= self.KL_divergence()
        dL_dmu, dL_dS = self.dL_dmuS()
        # dL:
        self.X.mean.gradient  = dL_dmu
        self.X.variance.gradient  = dL_dS
        # dKL:
        self.X.mean.gradient -= self.X.mean
        self.X.variance.gradient -= (1. - (1. / (self.X.variance))) * 0.5
 if __name__ == '__main__':
    import numpy as np
    X = np.random.randn(20,2)
    W = np.linspace(0,1,10)[None,:]
    Y = (X*W).sum(1)
    missing = np.random.binomial(1,.1,size=Y.shape)
    pass
 def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
    """
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@ -8,7 +8,7 @@ from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import VarDTC
 from ..util.misc import param_to_array
-from ..core.parameterization.variational import VariationalPosterior
+from ..core.parameterization.variational import NormalPosterior
 class SparseGPRegression(SparseGP):
    """
@ -47,7 +47,7 @@ class SparseGPRegression(SparseGP):
        likelihood = likelihoods.Gaussian()
        if not (X_variance is None):
-            X = VariationalPosterior(X,X_variance)
+            X = NormalPosterior(X,X_variance)
        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
@ -88,7 +88,7 @@ class SparseGPRegressionUncertainInput(SparseGP):
        # kern defaults to rbf (plus white for stability)
        if kernel is None:
-            kernel = kern.rbf(input_dim)  + kern.white(input_dim, variance=1e-3)
+            kernel = kern.RBF(input_dim) + kern.White(input_dim, variance=1e-3)
        # Z defaults to a subset of the data
        if Z is None:
@ -99,5 +99,5 @@ class SparseGPRegressionUncertainInput(SparseGP):
        likelihood = likelihoods.Gaussian()
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC())
        self.ensure_default_constraints()
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@ -58,7 +58,7 @@ class SSGPLVM(SparseGP):
        super(SSGPLVM, self).parameters_changed()
        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
-        self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
+        self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@ -106,7 +106,7 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
            raise ValueError, "Bad limits for plotting"
        Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
-        Kx = kernel.K(Xnew, x, which_parts)
+        Kx = kernel.K(Xnew, x)
        pb.plot(Xnew, Kx, *args, **kwargs)
        pb.xlim(xmin, xmax)
        pb.xlabel("x")
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -56,10 +56,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
    if ax is None:
        fig = pb.figure(num=fignum)
        ax = fig.add_subplot(111)
-
+    
-    X, Y = param_to_array(model.X, model.Y)
+    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): 
-    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X_variance = model.X_variance
+        X = model.X.mean
-
+        X_variance = param_to_array(model.X.variance)
    else:
        X = model.X
    X, Y = param_to_array(X, model.Y)
    if hasattr(model, 'Z'): Z = param_to_array(model.Z)
    #work out what the inputs are for plotting (1D or 2D)
@ -98,10 +101,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        #add error bars for uncertain (if input uncertainty is being modelled)
-        #if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
+        if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
-        #    ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
+            ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
-        #                xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
+                        xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
-        #                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+                        ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
        #set the limits of the plot to some sensible values
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -8,13 +8,6 @@ import sys
 verbose = True
 try:
    import sympy
    SYMPY_AVAILABLE=True
 except ImportError:
    SYMPY_AVAILABLE=False
 class Kern_check_model(GPy.core.Model):
    """
    This is a dummy model class used as a base class for checking that the
@ -70,14 +63,11 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
        self.add_parameter(self.kernel)
    def parameters_changed(self):
        self.kernel.update_gradients_diag(self.dL_dK, self.X)
    def log_likelihood(self):
        return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
    def parameters_changed(self):
-        return self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X)
+        self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X)
 class Kern_check_dK_dX(Kern_check_model):
    """This class allows gradient checks for the gradient of a kernel with respect to X. """
@ -99,6 +89,8 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
    def parameters_changed(self):
        self.X.gradient =  self.kernel.gradients_X_diag(self.dL_dK, self.X)
 def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
    """
    This function runs on kernels to check the correctness of their
@ -217,11 +209,15 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
    return pass_checks
 class KernelTestsContinuous(unittest.TestCase):
    def setUp(self):
        self.X = np.random.randn(100,2)
        self.X2 = np.random.randn(110,2)
        continuous_kerns = ['RBF', 'Linear']
        self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
    def test_Matern32(self):
        k = GPy.kern.Matern32(2)
        self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
@ -234,6 +230,7 @@ class KernelTestsContinuous(unittest.TestCase):
 if __name__ == "__main__":
    print "Running unit tests, please be (very) patient..."
    unittest.main()
--- a/GPy/testing/observable_tests.py
+++ b/GPy/testing/observable_tests.py
@ -0,0 +1,133 @@
 '''
 Created on 27 Feb 2014
@author: maxz
 '''
 import unittest
 from GPy.core.parameterization.parameterized import Parameterized
 from GPy.core.parameterization.param import Param
 import numpy
 class ParamTestParent(Parameterized):
    parent_changed_count = 0
    def parameters_changed(self):
        self.parent_changed_count += 1
 class ParameterizedTest(Parameterized):
    params_changed_count = 0
    def parameters_changed(self):
        self.params_changed_count += 1
    def _set_params(self, params, trigger_parent=True):
        Parameterized._set_params(self, params, trigger_parent=trigger_parent)
 class Test(unittest.TestCase):
    def setUp(self):
        self.parent = ParamTestParent('test parent')
        self.par = ParameterizedTest('test model')
        self.par2 = ParameterizedTest('test model 2')
        self.p = Param('test parameter', numpy.random.normal(1,2,(10,3)))
        self.par.add_parameter(self.p)
        self.par.add_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
        self.par.add_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
        self.par2.add_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
        self.par2.add_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
        self.parent.add_parameter(self.par)
        self.parent.add_parameter(self.par2)
        self._observer_triggered = None
        self._trigger_count = 0
        self._first = None
        self._second = None
    def _trigger(self, which):
        self._observer_triggered = float(which)
        self._trigger_count += 1
        if self._first is not None:
            self._second = self._trigger
        else:
            self._first = self._trigger
    def _trigger_priority(self, which):
        if self._first is not None:
            self._second = self._trigger_priority
        else:
            self._first = self._trigger_priority
    def test_observable(self):
        self.par.add_observer(self, self._trigger, -1)
        self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
        self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
        self.p[0,1] = 3 # trigger observers
        self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
        self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
        self.assertEqual(self.par.params_changed_count, 1, 'params changed once')
        self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
        self.par.remove_observer(self)
        self.p[2,1] = 4
        self.assertEqual(self._observer_triggered, 3, 'observer should not have triggered')
        self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
        self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
        self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
        self.par.add_observer(self, self._trigger, -1)
        self.p[2,1] = 4
        self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
        self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
        self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
        self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
        self.par.remove_observer(self, self._trigger)
        self.p[0,1] = 3
        self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
        self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
        self.assertEqual(self.par.params_changed_count, 4, 'params changed second')
        self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
    def test_set_params(self):
        self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
        self.par._set_params(numpy.ones(self.par.size))
        self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
        self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
        self.parent._set_params(numpy.ones(self.parent.size) * 2)
        self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
        self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
    def test_priority_notify(self):
        self.assertEqual(self.par.params_changed_count, 0)
        self.par._notify_observers(0, None)
        self.assertEqual(self.par.params_changed_count, 1)
        self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
        self.par._notify_observers(0, -numpy.inf)
        self.assertEqual(self.par.params_changed_count, 2)
        self.assertEqual(self.parent.parent_changed_count, 1)
    def test_priority(self):
        self.par.add_observer(self, self._trigger, -1)
        self.par.add_observer(self, self._trigger_priority, 0)
        self.par._notify_observers(0)
        self.assertEqual(self._first, self._trigger_priority, 'priority should be first')
        self.assertEqual(self._second, self._trigger, 'priority should be first')
        self.par.remove_observer(self)
        self._first = self._second = None
        self.par.add_observer(self, self._trigger, 1)
        self.par.add_observer(self, self._trigger_priority, 0)
        self.par._notify_observers(0)
        self.assertEqual(self._first, self._trigger, 'priority should be second')
        self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -6,6 +6,7 @@ Created on Feb 13, 2014
 import unittest
 import GPy
 import numpy as np
 from GPy.core.parameterization.parameter_core import HierarchyError
 class Test(unittest.TestCase):
@ -65,7 +66,7 @@ class Test(unittest.TestCase):
        self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
    def test_add_parameter_already_in_hirarchy(self):
-        self.test1.add_parameter(self.white._parameters_[0])
+        self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])        
    def test_default_constraints(self):
        self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -1,6 +1,14 @@
 from ..core.parameterization.parameter_core import Observable
 import itertools
 class Cacher(object):
    """
    """
    def __init__(self, operation, limit=5, ignore_args=()):
        self.limit = int(limit)
        self.ignore_args = ignore_args
@ -10,50 +18,78 @@ class Cacher(object):
        self.inputs_changed = []
    def __call__(self, *args):
        """
        A wrapper function for self.operation,
        """
        #ensure that specified arguments are ignored
        if len(self.ignore_args) != 0:
-            ca = [a for i,a in enumerate(args) if i not in self.ignore_args]
+            oa = [a for i,a in enumerate(args) if i not in self.ignore_args]
        else:
-            ca = args
+            oa = args
        # this makes sure we only add an observer once, and that None can be in args
-        cached_args = []
+        observable_args = []
-        for a in ca:
+        for a in oa:
-            if (not any(a is ai for ai in cached_args)) and a is not None:
+            if (not any(a is ai for ai in observable_args)) and a is not None:
-                cached_args.append(a)
+                observable_args.append(a)
-        if not all([isinstance(arg, Observable) for arg in cached_args]):
+
-            print cached_args
+        #make sure that all the found argument really are observable:
-            import ipdb;ipdb.set_trace()
+        #otherswise don't cache anything, pass args straight though
        if not all([isinstance(arg, Observable) for arg in observable_args]):
            return self.operation(*args)
-        
+
-        if cached_args in self.cached_inputs:
+        # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
-            i = self.cached_inputs.index(cached_args)
+        # return self.operation(*args)
        #if the result is cached, return the cached computation
        state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
        if any(state):
            i = state.index(True)
            if self.inputs_changed[i]:
                #(elements of) the args have changed since we last computed: update
                self.cached_outputs[i] = self.operation(*args)
                self.inputs_changed[i] = False
            return self.cached_outputs[i]
        else:
            #first time we've seen these arguments: compute
            #first make sure the depth limit isn't exceeded
            if len(self.cached_inputs) == self.limit:
                args_ = self.cached_inputs.pop(0)
-                [a.remove_observer(self, self.on_cache_changed) for a in args_]
+                [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
                self.inputs_changed.pop(0)
                self.cached_outputs.pop(0)
-            self.cached_inputs.append(cached_args)
+            #compute
            self.cached_inputs.append(args)
            self.cached_outputs.append(self.operation(*args))
            self.inputs_changed.append(False)
-            [a.add_observer(self, self.on_cache_changed) for a in cached_args]
+            [a.add_observer(self, self.on_cache_changed) for a in observable_args]
-            return self.cached_outputs[-1]
+            return self.cached_outputs[-1]#Max says return.
    def on_cache_changed(self, arg):
        """
        A callback funtion, which sets local flags when the elements of some cached inputs change
        this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
        """
        self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
    def reset(self, obj):
-        [[a.remove_observer(self, self.on_cache_changed) for a in args] for args in self.cached_inputs]
+        """
-        [[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs]
+        Totally reset the cache
        """
        [[a.remove_observer(self, self.on_cache_changed) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
        [[a.remove_observer(self, self.reset) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
        self.cached_inputs = []
        self.cached_outputs = []
        self.inputs_changed = []
 class Cache_this(object):
    """
    A decorator which can be applied to bound methods in order to cache them
    """
    def __init__(self, limit=5, ignore_args=()):
        self.limit = limit
        self.ignore_args = ignore_args
@ -64,4 +100,5 @@ class Cache_this(object):
                self.c = Cacher(f, self.limit, ignore_args=self.ignore_args)
            return self.c(*args)
        f_wrap._cacher = self
-        return f_wrap
+        f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "")
        return f_wrap
		`@ -0,0 +1,2 @@`
							`# Copyright (c) 2012, GPy authors (see AUTHORS.txt).`
							`# Licensed under the BSD 3-clause license (see LICENSE.txt)`