Merge branch 'params' of github.com:SheffieldML/GPy into params

2026-05-05 01:32:40 +02:00 · 2014-03-14 13:19:50 +00:00 · 2014-03-14 13:19:50 +00:00 · a56928e11a
commit a56928e11a
parent 7cd75ccbf5 f88b4c92e8
30 changed files with 361 additions and 310 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -10,7 +10,7 @@ from model import Model
 from parameterization import ObservableArray
 from .. import likelihoods
 from ..likelihoods.gaussian import Gaussian
-from ..inference.latent_function_inference import exact_gaussian_inference
+from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
 from parameterization.variational import VariationalPosterior

 class GP(Model):
@ -56,7 +56,7 @@ class GP(Model):
            if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
                inference_method = exact_gaussian_inference.ExactGaussianInference()
            else:
-                inference_method = expectation_propagation
+                inference_method = expectation_propagation.EP()
                print "defaulting to ", inference_method, "for latent function inference"
        self.inference_method = inference_method

@ -94,6 +94,9 @@ class GP(Model):
            #var = Kxx - np.sum(LiKx*LiKx, 0)
            var = Kxx - np.sum(WiKx*Kx, 0)
            var = var.reshape(-1, 1)
+
+        #force mu to be a column vector
+        if len(mu.shape)==1: mu = mu[:,None]
        return mu, var

    def predict(self, Xnew, full_cov=False, Y_metadata=None):
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -301,8 +301,7 @@ class Model(Parameterized):

            denominator = (2 * np.dot(dx, gradient))
            global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
-
-            return np.abs(1. - global_ratio) < tolerance
+            return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance
        else:
            # check the gradient of each parameter individually, and do some pretty printing
            try:
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -23,17 +23,16 @@ class ParameterIndexOperations(object):
        if constraints is not None:
            for t, i in constraints.iteritems():
                self.add(t, i)
-        
+
    def __getstate__(self):
-        return self._properties#, self._reverse
-        
+        return self._properties
+
    def __setstate__(self, state):
-        self._properties = state[0]
-        # self._reverse = state[1]
+        self._properties = state

    def iteritems(self):
        return self._properties.iteritems()
-    
+
    def items(self):
        return self._properties.items()

@ -42,7 +41,7 @@ class ParameterIndexOperations(object):

    def iterproperties(self):
        return self._properties.iterkeys()
-    
+
    def shift_right(self, start, size):
        for ind in self.iterindices():
            toshift = ind>=start
@ -58,29 +57,26 @@ class ParameterIndexOperations(object):
                ind[toshift] -= size
            if ind.size != 0: self._properties[v] = ind
            else: del self._properties[v]
-    
+
    def clear(self):
        self._properties.clear()
-    
+
    @property
    def size(self):
-        return reduce(lambda a,b: a+b.size, self.iterindices(), 0)    
-    
+        return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
+
    def iterindices(self):
        return self._properties.itervalues()
-    
+
    def indices(self):
        return self._properties.values()

    def properties_for(self, index):
        return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
-        
+
    def add(self, prop, indices):
-        try:
-            self._properties[prop] = combine_indices(self._properties[prop], indices)
-        except KeyError:
-            self._properties[prop] = indices
-    
+        self._properties[prop] = combine_indices(self._properties[prop], indices)
+
    def remove(self, prop, indices):
        if prop in self._properties:
            diff = remove_indices(self[prop], indices)
@ -91,22 +87,22 @@ class ParameterIndexOperations(object):
                del self._properties[prop]
            return removed.astype(int)
        return numpy.array([]).astype(int)
-    
+
    def update(self, parameter_index_view, offset=0):
        for i, v in parameter_index_view.iteritems():
            self.add(i, v+offset)

-    
+
    def copy(self):
        return ParameterIndexOperations(dict(self.iteritems()))
-    
+
    def __getitem__(self, prop):
        return self._properties[prop]
-    
+
    def __str__(self, *args, **kwargs):
        import pprint
        return pprint.pformat(dict(self._properties))
-       
+
 def combine_indices(arr1, arr2):
    return numpy.union1d(arr1, arr2)

@ -114,24 +110,22 @@ def remove_indices(arr, to_remove):
    return numpy.setdiff1d(arr, to_remove, True)

 def index_empty(index):
-    return numpy.size(index) == 0 
+    return numpy.size(index) == 0

 class ParameterIndexOperationsView(object):
    def __init__(self, param_index_operations, offset, size):
        self._param_index_ops = param_index_operations
        self._offset = offset
        self._size = size
-    
+
    def __getstate__(self):
        return [self._param_index_ops, self._offset, self._size]

-
    def __setstate__(self, state):
        self._param_index_ops = state[0]
        self._offset = state[1]
        self._size = state[2]

-
    def _filter_index(self, ind):
        return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset

@ -140,7 +134,7 @@ class ParameterIndexOperationsView(object):
        for i, ind in self._param_index_ops.iteritems():
            ind2 = self._filter_index(ind)
            if ind2.size > 0:
-                yield i, ind2 
+                yield i, ind2

    def items(self):
        return [[i,v] for i,v in self.iteritems()]
@ -151,7 +145,7 @@ class ParameterIndexOperationsView(object):

    def iterproperties(self):
        for i, _ in self.iteritems():
-            yield i 
+            yield i


    def shift_right(self, start, size):
@ -161,7 +155,7 @@ class ParameterIndexOperationsView(object):
        self._param_index_ops.shift_left(start+self._offset, size)
        self._offset -= size
        self._size -= size
-            
+
    def clear(self):
        for i, ind in self.items():
            self._param_index_ops.remove(i, ind+self._offset)
@ -198,7 +192,7 @@ class ParameterIndexOperationsView(object):
    def __getitem__(self, prop):
        ind = self._filter_index(self._param_index_ops[prop])
        return ind
-    
+
    def __str__(self, *args, **kwargs):
        import pprint
        return pprint.pformat(dict(self.iteritems()))
@ -206,8 +200,8 @@ class ParameterIndexOperationsView(object):
    def update(self, parameter_index_view, offset=0):
        for i, v in parameter_index_view.iteritems():
            self.add(i, v+offset)
-    
-    
+
+
    def copy(self):
        return ParameterIndexOperations(dict(self.iteritems()))
    pass
--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@ -5,21 +5,17 @@ Created on 27 Feb 2014
 '''

 from collections import defaultdict
-class DefaultArrayDict(defaultdict):
-    def __init__(self):
+
+def intarray_default_factory():
+    import numpy as np
+    return np.int_([])
+
+class IntArrayDict(defaultdict):
+    def __init__(self, default_factory=None):
        """
        Default will be self._default, if not set otherwise
        """
-        defaultdict.__init__(self, self.default_factory)
-
-class SetDict(DefaultArrayDict):
-    def default_factory(self):
-        return set()
-
-class IntArrayDict(DefaultArrayDict):
-    def default_factory(self):
-        import numpy as np
-        return np.int_([])
+        defaultdict.__init__(self, intarray_default_factory)

 class ArrayList(list):
    """
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -49,9 +49,6 @@ class Param(OptimizationHandlable, ObservableArray):
        obj._realshape_ = obj.shape
        obj._realsize_ = obj.size
        obj._realndim_ = obj.ndim
-        from lists_and_dicts import SetDict
-        obj._tied_to_me_ = SetDict()
-        obj._tied_to_ = []
        obj._original_ = True
        obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
        return obj
@ -80,13 +77,11 @@ class Param(OptimizationHandlable, ObservableArray):
        self._parent_index_ = getattr(obj, '_parent_index_', None)
        self._default_constraint_ = getattr(obj, '_default_constraint_', None)
        self._current_slice_ = getattr(obj, '_current_slice_', None)
-        self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
-        self._tied_to_ = getattr(obj, '_tied_to_', None)
        self._realshape_ = getattr(obj, '_realshape_', None)
        self._realsize_ = getattr(obj, '_realsize_', None)
        self._realndim_ = getattr(obj, '_realndim_', None)
        self._original_ = getattr(obj, '_original_', None)
-        self._name = getattr(obj, 'name', None)
+        self._name = getattr(obj, '_name', None)
        self._gradient_array_ = getattr(obj, '_gradient_array_', None)
        self.constraints = getattr(obj, 'constraints', None)
        self.priors = getattr(obj, 'priors', None)
@ -106,10 +101,10 @@ class Param(OptimizationHandlable, ObservableArray):
    #===========================================================================
    # Pickling operations
    #===========================================================================
-    def __reduce_ex__(self):
+    def __reduce__(self):
        func, args, state = super(Param, self).__reduce__()
        return func, args, (state,
-                            (self.name,
+                            (self._name,
                             self._parent_,
                             self._parent_index_,
                             self._default_constraint_,
@ -117,16 +112,16 @@ class Param(OptimizationHandlable, ObservableArray):
                             self._realshape_,
                             self._realsize_,
                             self._realndim_,
-                             self._tied_to_me_,
-                             self._tied_to_,
+                             self.constraints,
+                             self.priors
                            )
                            )

    def __setstate__(self, state):
        super(Param, self).__setstate__(state[0])
        state = list(state[1])
-        self._tied_to_ = state.pop()
-        self._tied_to_me_ = state.pop()
+        self.priors = state.pop()
+        self.constraints = state.pop()
        self._realndim_ = state.pop()
        self._realsize_ = state.pop()
        self._realshape_ = state.pop()
@ -134,7 +129,7 @@ class Param(OptimizationHandlable, ObservableArray):
        self._default_constraint_ = state.pop()
        self._parent_index_ = state.pop()
        self._parent_ = state.pop()
-        self.name = state.pop()
+        self._name = state.pop()

    def copy(self, *args):
        constr = self.constraints.copy()
@ -180,21 +175,21 @@ class Param(OptimizationHandlable, ObservableArray):
    #===========================================================================
    # Index Operations:
    #===========================================================================
-    def _internal_offset(self):
-        internal_offset = 0
-        extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
-        for i, si in enumerate(self._current_slice_[:self._realndim_]):
-            if numpy.all(si == Ellipsis):
-                continue
-            if isinstance(si, slice):
-                a = si.indices(self._realshape_[i])[0]
-            elif isinstance(si, (list,numpy.ndarray,tuple)):
-                a = si[0]
-            else: a = si
-            if a < 0:
-                a = self._realshape_[i] + a
-            internal_offset += a * extended_realshape[i]
-        return internal_offset
+    #def _internal_offset(self):
+    #    internal_offset = 0
+    #    extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
+    #    for i, si in enumerate(self._current_slice_[:self._realndim_]):
+    #        if numpy.all(si == Ellipsis):
+    #            continue
+    #        if isinstance(si, slice):
+    #            a = si.indices(self._realshape_[i])[0]
+    #        elif isinstance(si, (list,numpy.ndarray,tuple)):
+    #            a = si[0]
+    #        else: a = si
+    #        if a < 0:
+    #            a = self._realshape_[i] + a
+    #        internal_offset += a * extended_realshape[i]
+    #    return internal_offset

    def _raveled_index(self, slice_index=None):
        # return an index array on the raveled array, which is formed by the current_slice
@ -204,6 +199,9 @@ class Param(OptimizationHandlable, ObservableArray):
        if ind.ndim < 2: ind = ind[:, None]
        return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)

+    def _raveled_index_for(self, obj):
+        return self._raveled_index()
+
    def _expand_index(self, slice_index=None):
        # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
        # it basically translates slices to their respective index arrays and turns negative indices around
@ -224,6 +222,11 @@ class Param(OptimizationHandlable, ObservableArray):
                return numpy.r_[a]
            return numpy.r_[:b]
        return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
+    #===========================================================================
+    # Constrainable
+    #===========================================================================
+    def _ensure_fixes(self):
+        self._fixes_ = numpy.ones(self._realsize_, dtype=bool)

    #===========================================================================
    # Convenience
@ -239,7 +242,6 @@ class Param(OptimizationHandlable, ObservableArray):
    #round.__doc__ = numpy.round.__doc__
    def _get_original(self, param):
        return self
-
    #===========================================================================
    # Printing -> done
    #===========================================================================
@ -266,23 +268,11 @@ class Param(OptimizationHandlable, ObservableArray):
        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
    @property
    def _ties_str(self):
-        return [t._short() for t in self._tied_to_] or ['']
+        return ['']
    def __repr__(self, *args, **kwargs):
        name = "\033[1m{x:s}\033[0;0m:\n".format(
                            x=self.hierarchy_name())
        return name + super(Param, self).__repr__(*args, **kwargs)
-    def _ties_for(self, rav_index):
-        # size = sum(p.size for p in self._tied_to_)
-        ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
-        for i, tied_to in enumerate(self._tied_to_):
-            for t, ind in tied_to._tied_to_me_.iteritems():
-                if t._parent_index_ == self._parent_index_:
-                    matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
-                    tt_rav_index = tied_to._raveled_index()
-                    ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
-                    if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
-                    else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
-        return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
    def _indices(self, slice_index=None):
        # get a int-array containing all indices in the first axis.
        if slice_index is None:
@ -322,8 +312,8 @@ class Param(OptimizationHandlable, ObservableArray):
        ravi = self._raveled_index(filter_)
        if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
        if prirs is None: prirs = self.priors.properties_for(ravi)
-        if ties is None: ties = self._ties_for(ravi)
-        ties = [' '.join(map(lambda x: x._short(), t)) for t in ties]
+        if ties is None: ties = [['N/A']]*self.size
+        ties = [' '.join(map(lambda x: x, t)) for t in ties]
        if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
        if lx is None: lx = self._max_len_values()
        if li is None: li = self._max_len_index(indices)
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -16,7 +16,7 @@ Observable Pattern for patameterization
 from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np

-__updated__ = '2014-03-13'
+__updated__ = '2014-03-14'

 class HierarchyError(Exception):
    """
@ -31,7 +31,71 @@ def adjust_name_for_printing(name):
        return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@",'_at_')
    return ''

-class Observable(object):
+class InterfacePickleFunctions(object):
+    def __init__(self, *a, **kw):
+        super(InterfacePickleFunctions, self).__init__()
+
+    def _getstate(self):
+        """
+        Returns the state of this class in a memento pattern.
+        The state must be a list-like structure of all the fields
+        this class needs to run.
+
+        See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
+        """
+        raise NotImplementedError, "To be able to use pickling you need to implement this method"
+    def _setstate(self, state):
+        """
+        Set the state (memento pattern) of this class to the given state.
+        Usually this is just the counterpart to _getstate, such that
+        an object is a copy of another when calling
+
+            copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
+
+        See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
+        """
+        raise NotImplementedError, "To be able to use pickling you need to implement this method"
+
+class Pickleable(object):
+    """
+    Make an object pickleable (See python doc 'pickling').
+
+    This class allows for pickling support by Memento pattern.
+    _getstate returns a memento of the class, which gets pickled.
+    _setstate(<memento>) (re-)sets the state of the class to the memento
+    """
+    def __init__(self, *a, **kw):
+        super(Pickleable, self).__init__()
+    #===========================================================================
+    # Pickling operations
+    #===========================================================================
+    def pickle(self, f, protocol=-1):
+        """
+        :param f: either filename or open file object to write to.
+                  if it is an open buffer, you have to make sure to close
+                  it properly.
+        :param protocol: pickling protocol to use, python-pickle for details.
+        """
+        import cPickle
+        if isinstance(f, str):
+            with open(f, 'w') as f:
+                cPickle.dump(self, f, protocol)
+        else:
+            cPickle.dump(self, f, protocol)
+    def __getstate__(self):
+        if self._has_get_set_state():
+            return self._getstate()
+        return self.__dict__
+    def __setstate__(self, state):
+        if self._has_get_set_state():
+            self._setstate(state)
+            # TODO: maybe parameters_changed() here?
+            return
+        self.__dict__ = state
+    def _has_get_set_state(self):
+        return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
+
+class Observable(InterfacePickleFunctions):
    """
    Observable pattern for parameterization.

@ -41,7 +105,7 @@ class Observable(object):
    """
    _updated = True
    def __init__(self, *args, **kwargs):
-        super(Observable, self).__init__()
+        super(Observable, self).__init__(*args, **kwargs)
        self._observer_callables_ = []

    def add_observer(self, observer, callble, priority=0):
@ -89,68 +153,16 @@ class Observable(object):
            ins += 1
        self._observer_callables_.insert(ins, (p, o, c))

-class Pickleable(object):
-    """
-    Make an object pickleable (See python doc 'pickling').
-
-    This class allows for pickling support by Memento pattern.
-    _getstate returns a memento of the class, which gets pickled.
-    _setstate(<memento>) (re-)sets the state of the class to the memento
-    """
-    #===========================================================================
-    # Pickling operations
-    #===========================================================================
-    def pickle(self, f, protocol=-1):
-        """
-        :param f: either filename or open file object to write to.
-                  if it is an open buffer, you have to make sure to close
-                  it properly.
-        :param protocol: pickling protocol to use, python-pickle for details.
-        """
-        import cPickle
-        if isinstance(f, str):
-            with open(f, 'w') as f:
-                cPickle.dump(self, f, protocol)
-        else:
-            cPickle.dump(self, f, protocol)
-    def __getstate__(self):
-        if self._has_get_set_state():
-            return self._getstate()
-        return self.__dict__
-    def __setstate__(self, state):
-        if self._has_get_set_state():
-            self._setstate(state)
-            # TODO: maybe parameters_changed() here?
-            return
-        self.__dict__ = state
-    def _has_get_set_state(self):
-        return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
    def _getstate(self):
-        """
-        Returns the state of this class in a memento pattern.
-        The state must be a list-like structure of all the fields
-        this class needs to run.
-
-        See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
-        """
-        raise NotImplementedError, "To be able to use pickling you need to implement this method"
+        return [self._observer_callables_]
    def _setstate(self, state):
-        """
-        Set the state (memento pattern) of this class to the given state.
-        Usually this is just the counterpart to _getstate, such that
-        an object is a copy of another when calling
-
-            copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
-
-        See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
-        """
-        raise NotImplementedError, "To be able to use pickling you need to implement this method"
+        self._observer_callables_ = state.pop()

 #===============================================================================
 # Foundation framework for parameterized and param objects:
 #===============================================================================

-class Parentable(object):
+class Parentable(Observable):
    """
    Enable an Object to have a parent.

@ -160,7 +172,7 @@ class Parentable(object):
    _parent_ = None
    _parent_index_ = None
    def __init__(self, *args, **kwargs):
-        super(Parentable, self).__init__()
+        super(Parentable, self).__init__(*args, **kwargs)

    def has_parent(self):
        """
@ -284,13 +296,6 @@ class Indexable(object):
        """
        raise NotImplementedError, "Need to be able to get the raveled Index"

-    def _internal_offset(self):
-        """
-        The offset for this parameter inside its parent.
-        This has to account for shaped parameters!
-        """
-        return 0
-
    def _offset_for(self, param):
        """
        Return the offset of the param inside this parameterized object.
@ -308,7 +313,7 @@ class Indexable(object):
        raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"


-class Constrainable(Nameable, Indexable, Observable):
+class Constrainable(Nameable, Indexable):
    """
    Make an object constrainable with Priors and Transformations.
    TODO: Mappings!!
@ -367,21 +372,26 @@ class Constrainable(Nameable, Indexable, Observable):
        self._highest_parent_._set_unfixed(unconstrained)
    unfix = unconstrain_fixed

+    def _ensure_fixes(self):
+        # Ensure that the fixes array is set:
+        # Parameterized: ones(self.size)
+        # Param: ones(self._realsize_
+        self._fixes_ = np.ones(self.size, dtype=bool)
+
    def _set_fixed(self, index):
-        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
+        self._ensure_fixes()
        self._fixes_[index] = FIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED

    def _set_unfixed(self, index):
-        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
-        # rav_i = self._raveled_index_for(param)[index]
+        self._ensure_fixes()
        self._fixes_[index] = UNFIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED

    def _connect_fixes(self):
        fixed_indices = self.constraints[__fixed__]
        if fixed_indices.size > 0:
-            self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED
+            self._ensure_fixes()
            self._fixes_[fixed_indices] = FIXED
        else:
            self._fixes_ = None
@ -401,6 +411,15 @@ class Constrainable(Nameable, Indexable, Observable):
        repriorized = self.unset_priors()
        self._add_to_index_operations(self.priors, repriorized, prior, warning)

+        from domains import _REAL, _POSITIVE, _NEGATIVE
+        if prior.domain is _POSITIVE:
+            self.constrain_positive(warning)
+        elif prior.domain is _NEGATIVE:
+            self.constrain_negative(warning)
+        elif prior.domain is _REAL:
+            rav_i = self._raveled_index()
+            assert all(all(c.domain is _REAL for c in con) for con in self.constraints.properties_for(rav_i))
+
    def unset_priors(self, *priors):
        """
        Un-set all priors given from this parameter handle.
@ -411,14 +430,14 @@ class Constrainable(Nameable, Indexable, Observable):
    def log_prior(self):
        """evaluate the prior"""
        if self.priors.size > 0:
-            x = self._get_params()
-            return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
+            x = self._param_array_
+            return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
        return 0.

    def _log_prior_gradients(self):
        """evaluate the gradients of the priors"""
        if self.priors.size > 0:
-            x = self._get_params()
+            x = self._param_array_
            ret = np.zeros(x.size)
            [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
            return ret
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -89,7 +89,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=

    likelihood = GPy.likelihoods.Bernoulli()
    laplace_inf = GPy.inference.latent_function_inference.Laplace()
-    kernel = GPy.kern.rbf(1)
+    kernel = GPy.kern.RBF(1)

    # Model definition
    m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -318,7 +318,7 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
    Y /= Y.std()

    if kernel_type == 'linear':
-        kernel = GPy.kern.linear(X.shape[1], ARD=1)
+        kernel = GPy.kern.Linear(X.shape[1], ARD=1)
    elif kernel_type == 'rbf_inv':
        kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
    else:
@ -357,7 +357,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
    Y /= Y.std()

    if kernel_type == 'linear':
-        kernel = GPy.kern.linear(X.shape[1], ARD=1)
+        kernel = GPy.kern.Linear(X.shape[1], ARD=1)
    elif kernel_type == 'rbf_inv':
        kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
    else:
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -27,8 +27,8 @@ etc.

 from exact_gaussian_inference import ExactGaussianInference
 from laplace import Laplace
-expectation_propagation = 'foo' # TODO
 from GPy.inference.latent_function_inference.var_dtc import VarDTC
+from expectation_propagation import EP
 from dtc import DTC
 from fitc import FITC

--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -1,7 +1,7 @@
 import numpy as np
-from scipy import stats
-from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
-from likelihood import likelihood
+from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
+from posterior import Posterior
+log_2_pi = np.log(2*np.pi)

 class EP(object):
    def __init__(self, epsilon=1e-6, eta=1., delta=1.):
@ -28,30 +28,30 @@ class EP(object):

        K = kern.K(X)

-        mu_tilde, tau_tilde = self.expectation_propagation()
+        mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata)

-        Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)
+        Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

        alpha, _ = dpotrs(LW, mu_tilde, lower=1)

-        log_marginal =  0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde))
+        log_marginal =  0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??

        dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)

-        #TODO: what abot derivatives of the likelihood parameters?
+        dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters

-        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
+        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}



-    def expectation_propagation(self, K, Y, Y_metadata, likelihood)
+    def expectation_propagation(self, K, Y, likelihood, Y_metadata):

        num_data, data_dim = Y.shape
        assert data_dim == 1, "This EP methods only works for 1D outputs"


        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
-        mu = np.zeros(self.num_data)
+        mu = np.zeros(num_data)
        Sigma = K.copy()

        #Initial values - Marginal moments
@ -61,33 +61,32 @@ class EP(object):

        #initial values - Gaussian factors
        if self.old_mutilde is None:
-            tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data))
+            tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
        else:
            assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
            mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
            tau_tilde = v_tilde/mu_tilde

        #Approximation
-        epsilon_np1 = self.epsilon + 1.
-        epsilon_np2 = self.epsilon + 1.
+        tau_diff = self.epsilon + 1.
+        v_diff = self.epsilon + 1.
       	iterations = 0
-        while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon):
+        while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
            update_order = np.random.permutation(num_data)
            for i in update_order:
                #Cavity distribution parameters
                tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
                v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
                #Marginal moments
-                Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i]))
+                Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
                #Site parameters update
                delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
                delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
                tau_tilde[i] += delta_tau
                v_tilde[i] += delta_v
                #Posterior distribution parameters update
-                DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i]))
+                DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
                mu = np.dot(Sigma, v_tilde)
-                iterations += 1

            #(re) compute Sigma and mu using full Cholesky decompy
            tau_tilde_root = np.sqrt(tau_tilde)
@ -99,10 +98,14 @@ class EP(object):
            mu = np.dot(Sigma,v_tilde)

            #monitor convergence
-            epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old))
-            epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old))
+            if iterations>0:
+                tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
+                v_diff = np.mean(np.square(v_tilde-v_tilde_old))
            tau_tilde_old = tau_tilde.copy()
            v_tilde_old = v_tilde.copy()

-        return mu, Sigma, mu_tilde, tau_tilde
+            iterations += 1
+
+        mu_tilde = v_tilde/tau_tilde
+        return mu, Sigma, mu_tilde, tau_tilde, Z_hat

--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@ -9,7 +9,9 @@ from kern import CombinationKernel
 class Add(CombinationKernel):
    """
    Add given list of kernels together.
-    propagates gradients thorugh.
+    propagates gradients through.
+    
+    This kernel will take over the active dims of it's subkernels passed in.
    """
    def __init__(self, subkerns, name='add'):
        super(Add, self).__init__(subkerns, name)
--- a/GPy/kern/_src/brownian.py
+++ b/GPy/kern/_src/brownian.py
@ -17,9 +17,9 @@ class Brownian(Kern):
    :param variance:
    :type variance: float
    """
-    def __init__(self, input_dim=1, variance=1., name='Brownian'):
+    def __init__(self, input_dim=1, variance=1., active_dims=None, name='Brownian'):
        assert input_dim==1, "Brownian motion in 1D only"
-        super(Brownian, self).__init__(input_dim, name)
+        super(Brownian, self).__init__(input_dim, active_dims, name)

        self.variance = Param('variance', variance, Logexp())
        self.add_parameters(self.variance)
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@ -34,8 +34,8 @@ class Coregionalize(Kern):

    .. note: see coregionalization examples in GPy.examples.regression for some usage.
    """
-    def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, name='coregion'):
-        super(Coregionalize, self).__init__(input_dim, name=name)
+    def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
+        super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
        self.output_dim = output_dim
        self.rank = rank
        if self.rank>output_dim:
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)


-from kern import Kern
+from kern import Kern, CombinationKernel
 import numpy as np
 import itertools

@ -32,7 +32,7 @@ def index_to_slices(index):
    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
    return ret

-class IndependentOutputs(Kern):
+class IndependentOutputs(CombinationKernel):
    """
    A kernel which can represent several independent functions.
    this kernel 'switches off' parts of the matrix where the output indexes are different.
@ -41,12 +41,12 @@ class IndependentOutputs(Kern):
    the rest of the columns of X are passed to the underlying kernel for computation (in blocks).

    """
-    def __init__(self, index_dim, kern, name='independ'):
+    def __init__(self, kern, index_dim=-1, name='independ'):
        assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
-        super(IndependentOutputs, self).__init__(np.r_[0:max(max(kern.active_dims)+1, index_dim+1)], name)
+        super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name)
        self.index_dim = index_dim
        self.kern = kern
-        self.add_parameters(self.kern)
+        #self.add_parameters(self.kern)

    def K(self,X ,X2=None):
        slices = index_to_slices(X[:,self.index_dim])
@ -80,19 +80,19 @@ class IndependentOutputs(Kern):
        self.kern.gradient = target

    def gradients_X(self,dL_dK, X, X2=None):
-        target = np.zeros_like(X)
+        target = np.zeros(X.shape)
        slices = index_to_slices(X[:,self.index_dim])
        if X2 is None:
            [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
        else:
-            X2,slices2 = X2[:,:self.index_dim],index_to_slices(X2[:,-1])
-            [[[np.copyto(target[s,:self.index_dim], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+            slices2 = index_to_slices(X2[:,self.index_dim])
+            [[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
        return target

    def gradients_X_diag(self, dL_dKdiag, X):
        slices = index_to_slices(X[:,self.index_dim])
        target = np.zeros(X.shape)
-        [[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
+        [[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
        return target

    def update_gradients_diag(self, dL_dKdiag, X):
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@ -16,26 +16,32 @@ class Kern(Parameterized):
    __metaclass__ = KernCallsViaSlicerMeta
    #===========================================================================
    _debug=False
-    def __init__(self, input_dim, name, *a, **kw):
+    def __init__(self, input_dim, active_dims, name, *a, **kw):
        """
        The base class for a kernel: a positive definite function
        which forms of a covariance function (kernel).

-        :param input_dim: the number of input dimensions to the function
-        :type input_dim: int
+        :param int input_dim: the number of input dimensions to the function
+        :param array-like|slice active_dims: list of indices on which dimensions this kernel works on

        Do not instantiate.
        """
        super(Kern, self).__init__(name=name, *a, **kw)
-        if isinstance(input_dim, int):
-            self.active_dims = np.r_[0:input_dim]
-            self.input_dim = input_dim
+        self.active_dims = active_dims if active_dims is not None else slice(0, input_dim)
+        self.input_dim = input_dim
+        assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
+        if isinstance(self.active_dims, slice):
+            self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
+            active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
+        elif isinstance(self.active_dims, np.ndarray):
+            assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions of the input'.format(self.active_dims.shape)
+            active_dim_size = self.active_dims.size
        else:
-            self.active_dims = np.r_[input_dim]
-            self.input_dim = len(self.active_dims)
+            active_dim_size = len(self.active_dims)
+        assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
        self._sliced_X = 0

-    @Cache_this(limit=10)#, ignore_args = (0,))
+    @Cache_this(limit=10)
    def _slice_X(self, X):
        return X[:, self.active_dims]

@ -69,9 +75,7 @@ class Kern(Parameterized):
    def update_gradients_full(self, dL_dK, X, X2):
        """Set the gradients of all parameters when doing full (N) inference."""
        raise NotImplementedError
-    def update_gradients_diag(self, dL_dKdiag, X):
-        """Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
-        raise NotImplementedError
+
    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        """
        Set the gradients of all parameters when doing inference with
@ -193,13 +197,28 @@ class Kern(Parameterized):
        super(Kern, self)._setstate(state)

 class CombinationKernel(Kern):
-    def __init__(self, kernels, name):
+    """
+    Abstract super class for combination kernels.
+    A combination kernel combines (a list of) kernels and works on those.
+    Examples are the HierarchicalKernel or Add and Prod kernels.
+    """
+    def __init__(self, kernels, name, extra_dims=[]):
+        """
+        Abstract super class for combination kernels.
+        A combination kernel combines (a list of) kernels and works on those.
+        Examples are the HierarchicalKernel or Add and Prod kernels.
+
+        :param list kernels: List of kernels to combine (can be only one element)
+        :param str name: name of the combination kernel
+        :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
+        """
        assert all([isinstance(k, Kern) for k in kernels])
-        # make sure the active dimensions of all underlying kernels are covered:
-        ma = reduce(lambda a,b: max(a, max(b)), (x.active_dims for x in kernels), 0)
-        input_dim = np.r_[0:ma+1]
+        active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
+        input_dim = active_dims.max()+1 + len(extra_dims)
+        active_dims = slice(active_dims.max()+1+len(extra_dims))
        # initialize the kernel with the full input_dim
-        super(CombinationKernel, self).__init__(input_dim, name)
+        super(CombinationKernel, self).__init__(input_dim, active_dims, name)
+        self.extra_dims = extra_dims
        self.add_parameters(*kernels)

    @property
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@ -34,8 +34,8 @@ class Linear(Kern):

    """

-    def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
-        super(Linear, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='linear'):
+        super(Linear, self).__init__(input_dim, active_dims, name)
        self.ARD = ARD
        if not ARD:
            if variances is not None:
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@ -31,8 +31,8 @@ class MLP(Kern):

    """

-    def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., name='mlp'):
-        super(MLP, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
+        super(MLP, self).__init__(input_dim, active_dims, name)
        self.variance = Param('variance', variance, Logexp())
        self.weight_variance = Param('weight_variance', weight_variance, Logexp())
        self.bias_variance = Param('bias_variance', bias_variance, Logexp())
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@ -10,7 +10,7 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp

 class Periodic(Kern):
-    def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, name):
+    def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
        """
        :type input_dim: int
        :param variance: the variance of the Matern kernel
@ -25,7 +25,7 @@ class Periodic(Kern):
        """

        assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
-        super(Periodic, self).__init__(input_dim, name)
+        super(Periodic, self).__init__(input_dim, active_dims, name)
        self.input_dim = input_dim
        self.lower,self.upper = lower, upper
        self.n_freq = n_freq
@ -77,8 +77,8 @@ class PeriodicExponential(Periodic):
    Only defined for input_dim=1.
    """

-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_exponential'):
-        super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_exponential'):
+        super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)

    def parameters_changed(self):
        self.a = [1./self.lengthscale, 1.]
@ -187,8 +187,8 @@ class PeriodicMatern32(Periodic):

    """

-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern32'):
-        super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern32'):
+        super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
    def parameters_changed(self):
        self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
        self.b = [1,self.lengthscale**2/3]
@ -300,8 +300,8 @@ class PeriodicMatern52(Periodic):

    """

-    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern52'):
-        super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name)
+    def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern52'):
+        super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)

    def parameters_changed(self):
        self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -19,8 +19,8 @@ class RBF(Stationary):
       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)

    """
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
-        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
+        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
        self.weave_options = {}

    def K_of_r(self, r):
--- a/GPy/kern/_src/ssrbf.py
+++ b/GPy/kern/_src/ssrbf.py
@ -33,9 +33,9 @@ class SSRBF(Stationary):
    .. Note: this object implements both the ARD and 'spherical' version of the function
    """

-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, name='SSRBF'):
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, active_dims=None, name='SSRBF'):
        assert ARD==True, "Not Implemented!"
-        super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
+        super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
        
    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r**2)
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@ -9,8 +9,8 @@ from ...core.parameterization.transformations import Logexp
 import numpy as np

 class Static(Kern):
-    def __init__(self, input_dim, variance, name):
-        super(Static, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variance, active_dims, name):
+        super(Static, self).__init__(input_dim, active_dims, name)
        self.variance = Param('variance', variance, Logexp())
        self.add_parameters(self.variance)

@ -43,8 +43,8 @@ class Static(Kern):


 class White(Static):
-    def __init__(self, input_dim, variance=1., name='white'):
-        super(White, self).__init__(input_dim, variance, name)
+    def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
+        super(White, self).__init__(input_dim, variance, active_dims, name)

    def K(self, X, X2=None):
        if X2 is None:
@ -66,8 +66,8 @@ class White(Static):


 class Bias(Static):
-    def __init__(self, input_dim, variance=1., name='bias'):
-        super(Bias, self).__init__(input_dim, variance, name)
+    def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
+        super(Bias, self).__init__(input_dim, variance, active_dims, name)

    def K(self, X, X2=None):
        shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
@ -90,14 +90,14 @@ class Bias(Static):
        self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()

 class Fixed(Static):
-    def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
+    def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
        """
        :param input_dim: the number of input dimensions
        :type input_dim: int
        :param variance: the variance of the kernel
        :type variance: float
        """
-        super(Bias, self).__init__(input_dim, variance, name)
+        super(Bias, self).__init__(input_dim, variance, active_dims, name)
        self.fixed_K = covariance_matrix
    def K(self, X, X2):
        return self.variance * self.fixed_K
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -41,8 +41,8 @@ class Stationary(Kern):
          
    """
    
-    def __init__(self, input_dim, variance, lengthscale, ARD, name):
-        super(Stationary, self).__init__(input_dim, name)
+    def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
+        super(Stationary, self).__init__(input_dim, active_dims, name)
        self.ARD = ARD
        if not ARD:
            if lengthscale is None:
@ -186,8 +186,8 @@ class Stationary(Kern):
        return np.ones(self.input_dim)/self.lengthscale

 class Exponential(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
-        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
+        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r)
@ -205,8 +205,8 @@ class Matern32(Stationary):

    """

-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
-        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
+        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def K_of_r(self, r):
        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
@ -249,8 +249,8 @@ class Matern52(Stationary):

       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) 
       """
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
-        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
+        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def K_of_r(self, r):
        return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
@ -291,8 +291,8 @@ class Matern52(Stationary):


 class ExpQuad(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
-        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
+        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r**2)
@ -301,8 +301,8 @@ class ExpQuad(Stationary):
        return -r*self.K_of_r(r)

 class Cosine(Stationary):
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'):
-        super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
+        super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def K_of_r(self, r):
        return self.variance * np.cos(r)
@ -322,8 +322,8 @@ class RatQuad(Stationary):
    """


-    def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, name='ExpQuad'):
-        super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
+    def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='ExpQuad'):
+        super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
        self.power = Param('power', power, Logexp())
        self.add_parameters(self.power)

--- a/GPy/kern/_src/sympykern.py
+++ b/GPy/kern/_src/sympykern.py
@ -26,13 +26,13 @@ class Sympykern(Kern):
     - to handle multiple inputs, call them x_1, z_1, etc
     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
    """
-    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
+    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):

        if name is None:
            name='sympykern'
        if k is None:
            raise ValueError, "You must provide an argument for the covariance function."
-        super(Sympykern, self).__init__(input_dim, name)
+        super(Sympykern, self).__init__(input_dim, active_dims, name)

        self._sp_k = k

--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -5,6 +5,7 @@ import numpy as np
 from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
 import link_functions
 from likelihood import Likelihood
+from scipy import stats

 class Bernoulli(Likelihood):
    """
@ -43,7 +44,7 @@ class Bernoulli(Likelihood):
        Y_prep[Y.flatten() == 0] = -1
        return Y_prep

-    def moments_match_ep(self, data_i, tau_i, v_i):
+    def moments_match_ep(self, Y_i, tau_i, v_i):
        """
        Moments match of the marginal approximation in EP algorithm

@ -51,9 +52,9 @@ class Bernoulli(Likelihood):
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
-        if data_i == 1:
+        if Y_i == 1:
            sign = 1.
-        elif data_i == 0:
+        elif Y_i == 0:
            sign = -1
        else:
            raise ValueError("bad value for Bernouilli observation (0, 1)")
@ -76,7 +77,7 @@ class Bernoulli(Likelihood):

        return Z_hat, mu_hat, sigma2_hat

-    def predictive_mean(self, mu, variance):
+    def predictive_mean(self, mu, variance, Y_metadata=None):

        if isinstance(self.gp_link, link_functions.Probit):
            return stats.norm.cdf(mu/np.sqrt(1+variance))
@ -87,7 +88,7 @@ class Bernoulli(Likelihood):
        else:
            raise NotImplementedError

-    def predictive_variance(self, mu, variance, pred_mean):
+    def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):

        if isinstance(self.gp_link, link_functions.Heaviside):
            return 0.
@ -211,7 +212,7 @@ class Bernoulli(Likelihood):
        np.seterr(**state)
        return d3logpdf_dlink3

-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@ -23,7 +23,7 @@ class GPClassification(GP):

    def __init__(self, X, Y, kernel=None):
        if kernel is None:
-            kernel = kern.rbf(X.shape[1])
+            kernel = kern.RBF(X.shape[1])

        likelihood = likelihoods.Bernoulli()

--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -118,7 +118,7 @@ class MRD(Model):
            self._log_marginal_likelihood += lml

            # likelihood gradients
-            l.update_gradients(grad_dict.pop('partial_for_likelihood'))
+            l.update_gradients(grad_dict.pop('dL_dthetaL'))

            #gradients wrt kernel
            dL_dKmm = grad_dict.pop('dL_dKmm')
--- a/GPy/testing/index_operations_tests.py
+++ b/GPy/testing/index_operations_tests.py
@ -17,24 +17,33 @@ class Test(unittest.TestCase):
        self.param_index.add(one, [3])
        self.param_index.add(two, [0,5])
        self.param_index.add(three, [2,4,7])
+        self.view = ParameterIndexOperationsView(self.param_index, 2, 6)
+
+    def test_clear(self):
+        self.param_index.clear()
+        self.assertDictEqual(self.param_index._properties, {})

    def test_remove(self):
        self.param_index.remove(three, np.r_[3:10])
        self.assertListEqual(self.param_index[three].tolist(), [2])
        self.param_index.remove(one, [1])
-        self.assertListEqual(self.param_index[one].tolist(), [3])        
+        self.assertListEqual(self.param_index[one].tolist(), [3])
+        self.assertListEqual(self.param_index.remove('not in there', []).tolist(), [])
+        self.param_index.remove(one, [9])
+        self.assertListEqual(self.param_index[one].tolist(), [3])
+        self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])

    def test_shift_left(self):
-        self.param_index.shift_left(1, 2)
+        self.view.shift_left(0, 2)
        self.assertListEqual(self.param_index[three].tolist(), [2,5])
        self.assertListEqual(self.param_index[two].tolist(), [0,3])
-        self.assertListEqual(self.param_index[one].tolist(), [1])        
+        self.assertListEqual(self.param_index[one].tolist(), [])

    def test_shift_right(self):
-        self.param_index.shift_right(5, 2)
+        self.view.shift_right(3, 2)
        self.assertListEqual(self.param_index[three].tolist(), [2,4,9])
        self.assertListEqual(self.param_index[two].tolist(), [0,7])
-        self.assertListEqual(self.param_index[one].tolist(), [3])        
+        self.assertListEqual(self.param_index[one].tolist(), [3])

    def test_index_view(self):
        #=======================================================================
@ -44,17 +53,17 @@ class Test(unittest.TestCase):
        #                   three     three          three
        # view:             [0    1    2    3    4    5    ]
        #=======================================================================
-        view = ParameterIndexOperationsView(self.param_index, 2, 6)
-        self.assertSetEqual(set(view.properties()), set([one, two, three]))
-        for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
+        self.view = ParameterIndexOperationsView(self.param_index, 2, 6)
+        self.assertSetEqual(set(self.view.properties()), set([one, two, three]))
+        for v,p in zip(self.view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
            self.assertEqual(v, p)
-        self.assertSetEqual(set(view[two]), set([3]))
+        self.assertSetEqual(set(self.view[two]), set([3]))
        self.assertSetEqual(set(self.param_index[two]), set([0, 5]))
-        view.add(two, np.array([0]))
-        self.assertSetEqual(set(view[two]), set([0,3]))
+        self.view.add(two, np.array([0]))
+        self.assertSetEqual(set(self.view[two]), set([0,3]))
        self.assertSetEqual(set(self.param_index[two]), set([0, 2, 5]))
-        view.clear()
-        for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
+        self.view.clear()
+        for v,p in zip(self.view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
            self.assertEqual(v, p)
            self.assertEqual(v, [])
        param_index = ParameterIndexOperations()
@ -62,11 +71,17 @@ class Test(unittest.TestCase):
        param_index.add(two, [0,5])
        param_index.add(three, [2,4,7])
        view2 = ParameterIndexOperationsView(param_index, 2, 6)
-        view.update(view2)
+        self.view.update(view2)
        for [i,v],[i2,v2] in zip(sorted(param_index.items()), sorted(self.param_index.items())):
            self.assertEqual(i, i2)
            self.assertTrue(np.all(v == v2))
-        
+
+    def test_misc(self):
+        for k,v in self.param_index.copy()._properties.iteritems():
+            self.assertListEqual(self.param_index[k].tolist(), v.tolist())
+        self.assertEqual(self.param_index.size, 6)
+        self.assertEqual(self.view.size, 5)
+
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_index_view']
    unittest.main()
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -228,12 +228,12 @@ class KernelGradientTestsContinuous(unittest.TestCase):
        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))

    def test_Prod(self):
-        k = GPy.kern.Matern32([2,3]) * GPy.kern.RBF([0,4]) + GPy.kern.Linear(self.D)
+        k = GPy.kern.Matern32(2, active_dims=[2,3]) * GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
        k.randomize()
        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))

    def test_Add(self):
-        k = GPy.kern.Matern32([2,3]) + GPy.kern.RBF([0,4]) + GPy.kern.Linear(self.D)
+        k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
        k.randomize()
        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))

@ -282,15 +282,16 @@ class KernelTestsMiscellaneous(unittest.TestCase):
    def setUp(self):
        N, D = 100, 10
        self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.ones(D)
-        self.rbf = GPy.kern.RBF(range(2))
-        self.linear = GPy.kern.Linear((3,6))
-        self.matern = GPy.kern.Matern32(np.array([2,4,7]))
+        self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2))
+        self.linear = GPy.kern.Linear(2, active_dims=(3,9))
+        self.matern = GPy.kern.Matern32(3, active_dims=np.array([2,4,9]))
        self.sumkern = self.rbf + self.linear
        self.sumkern += self.matern
        self.sumkern.randomize()

    def test_active_dims(self):
-        self.assertListEqual(self.sumkern.active_dims.tolist(), range(8))
+        self.assertEqual(self.sumkern.input_dim, 10)
+        self.assertEqual(self.sumkern.active_dims, slice(0, 10, 1))

    def test_which_parts(self):
        self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
@ -310,11 +311,12 @@ class KernelTestsNonContinuous(unittest.TestCase):
        self.X_block[N:N+N1, D:D+D] = self.X2
        self.X_block[0:N, -1] = 1
        self.X_block[N:N+1, -1] = 2
-
-    def test_IndependantOutputs(self):
+        self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
+ 
+    def test_IndependentOutputs(self):
        k = GPy.kern.RBF(self.D)
-        kern = GPy.kern.IndependentOutputs(self.D+self.D,k)
-        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose))
+        kern = GPy.kern.IndependentOutputs(k, -1)
+        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, X2=self.X_block, verbose=verbose))

 if __name__ == "__main__":
    print "Running unit tests, please be (very) patient..."
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -7,8 +7,24 @@ import unittest
 import GPy
 import numpy as np
 from GPy.core.parameterization.parameter_core import HierarchyError
+from GPy.core.parameterization.array_core import ObservableArray

-class Test(unittest.TestCase):
+class ArrayCoreTest(unittest.TestCase):
+    def setUp(self):
+        self.X = np.random.normal(1,1, size=(100,10))
+        self.obsX = ObservableArray(self.X)
+
+    def test_init(self):
+        X = ObservableArray(self.X)
+        X2 = ObservableArray(X)
+        self.assertIs(X, X2, "no new Observable array, when Observable is given")
+
+    def test_slice(self):
+        t1 = self.X[2:78]
+        t2 = self.obsX[2:78]
+        self.assertListEqual(t1.tolist(), t2.tolist(), "Slicing should be the exact same, as in ndarray")
+
+class ParameterizedTest(unittest.TestCase):

    def setUp(self):
        self.rbf = GPy.kern.RBF(1)
@ -35,7 +51,6 @@ class Test(unittest.TestCase):
        self.white.fix(warning=False)
        self.test1.remove_parameter(self.test1.param)
        self.assertTrue(self.test1._has_fixes())
-
        from GPy.core.parameterization.transformations import FIXED, UNFIXED
        self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])

@ -51,12 +66,12 @@ class Test(unittest.TestCase):
        self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
        self.assertEquals(self.white.constraints._offset, 0)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
-        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)        
+        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)

        self.test1.add_parameter(self.white, 0)
        self.assertIs(self.test1.constraints, self.white.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
-        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)        
+        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
        self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
        self.assertIs(self.white._fixes_,None)
        self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
@ -69,7 +84,7 @@ class Test(unittest.TestCase):
        self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])

    def test_add_parameter_already_in_hirarchy(self):
-        self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])        
+        self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])

    def test_default_constraints(self):
        self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
--- a/GPy/testing/prior_tests.py
+++ b/GPy/testing/prior_tests.py
@ -15,7 +15,7 @@ class PriorTests(unittest.TestCase):
        X, y = X[:, None], y[:, None]
        m = GPy.models.GPRegression(X, y)
        lognormal = GPy.priors.LogGaussian(1, 2)
-        m.set_prior('rbf', lognormal)
+        m.rbf.set_prior(lognormal)
        m.randomize()
        self.assertTrue(m.checkgrad())

@ -28,7 +28,7 @@ class PriorTests(unittest.TestCase):
        X, y = X[:, None], y[:, None]
        m = GPy.models.GPRegression(X, y)
        Gamma = GPy.priors.Gamma(1, 1)
-        m.set_prior('rbf', Gamma)
+        m.rbf.set_prior(Gamma)
        m.randomize()
        self.assertTrue(m.checkgrad())

@ -41,16 +41,9 @@ class PriorTests(unittest.TestCase):
        X, y = X[:, None], y[:, None]
        m = GPy.models.GPRegression(X, y)
        gaussian = GPy.priors.Gaussian(1, 1)
-        success = False
-
        # setting a Gaussian prior on non-negative parameters
        # should raise an assertionerror.
-        try:
-            m.set_prior('rbf', gaussian)
-        except AssertionError:
-            success = True
-
-        self.assertTrue(success)
+        self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)


 if __name__ == "__main__":