Merge branch 'params' of github.com:SheffieldML/GPy into params

2026-06-05 14:55:15 +02:00 · 2014-02-17 15:19:50 +00:00 · 2014-02-17 15:19:50 +00:00 · c7a3060c5a
commit c7a3060c5a
parent 91d8890a13 e96d40b4cc
38 changed files with 1131 additions and 1053 deletions
--- a/GPy/init.py
+++ b/GPy/init.py
@ -4,6 +4,7 @@ import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 import core
 from core.parameterization import transformations, priors
 import models
 import mappings
 import inference
@ -14,7 +15,6 @@ import testing
 from numpy.testing import Tester
 from nose.tools import nottest
 import kern
 from core import priors
 import plotting
@nottest
--- a/GPy/core/init.py
+++ b/GPy/core/init.py
@ -2,7 +2,6 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 from model import *
 from parameterization import priors
 from parameterization.parameterized import *
 from gp import GP
 from sparse_gp import SparseGP
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -26,7 +26,7 @@ class GP(Model):
    """
-    def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp'):
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None, Y_metadata=None, name='gp'):
        super(GP, self).__init__(name)
        assert X.ndim == 2
@ -38,6 +38,11 @@ class GP(Model):
        assert Y.shape[0] == self.num_data
        _, self.output_dim = self.Y.shape
        if Y_metadata is not None:
            self.Y_metadata = ObservableArray(Y_metadata)
        else:
            self.Y_metadata = None
        assert isinstance(kernel, kern.kern)
        self.kern = kernel
@ -55,10 +60,11 @@ class GP(Model):
        self.add_parameter(self.kern)
        self.add_parameter(self.likelihood)
-        self.parameters_changed()
+        if self.__class__ is GP:
            self.parameters_changed()
    def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y)
+        self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
        self._dL_dK = grad_dict['dL_dK']
    def log_likelihood(self):
@ -111,7 +117,7 @@ class GP(Model):
           This is to allow for different normalizations of the output dimensions.
        """
-        # normalize X values
+        #predict the latent function values
        mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
        # now push through likelihood
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -17,12 +17,10 @@ import itertools
 # import numdifftools as ndt
 class Model(Parameterized):
-    _fail_count = 0 # Count of failed optimization steps (see objective)
+    _fail_count = 0  # Count of failed optimization steps (see objective)
-    _allowed_failures = 10 # number of allowed failures
+    _allowed_failures = 10  # number of allowed failures
    def __init__(self, name):
-        super(Model, self).__init__(name)#Parameterized.__init__(self)
+        super(Model, self).__init__(name)  # Parameterized.__init__(self)
        self.priors = []
        self._priors = ParameterIndexOperations()
        self.optimization_runs = []
        self.sampling_runs = []
        self.preferred_optimizer = 'scg'
@ -30,10 +28,10 @@ class Model(Parameterized):
    def log_likelihood(self):
        raise NotImplementedError, "this needs to be implemented to use the model class"
    def _log_likelihood_gradients(self):
-        #def dK_d(self, param, dL_dK, X, X2)
+        # def dK_d(self, param, dL_dK, X, X2)
        g = np.zeros(self.size)
        try:
-            #[g.__setitem__(s, self.gradient_mapping[p]().flat) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
+            # [g.__setitem__(s, self.gradient_mapping[p]().flat) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
            [p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
        except ValueError:
            raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
@ -67,66 +65,66 @@ class Model(Parameterized):
        self.priors = state.pop()
        Parameterized._setstate(self, state)
-    def set_prior(self, regexp, what):
+#     def set_prior(self, regexp, what):
-        """
+#         """
-
+# 
-        Sets priors on the model parameters.
+#         Sets priors on the model parameters.
-
+# 
-        **Notes**
+#         **Notes**
-
+# 
-        Asserts that the prior is suitable for the constraint. If the
+#         Asserts that the prior is suitable for the constraint. If the
-        wrong constraint is in place, an error is raised.  If no
+#         wrong constraint is in place, an error is raised.  If no
-        constraint is in place, one is added (warning printed).
+#         constraint is in place, one is added (warning printed).
-
+# 
-        For tied parameters, the prior will only be "counted" once, thus
+#         For tied parameters, the prior will only be "counted" once, thus
-        a prior object is only inserted on the first tied index
+#         a prior object is only inserted on the first tied index
-
+# 
-        :param regexp: regular expression of parameters on which priors need to be set.
+#         :param regexp: regular expression of parameters on which priors need to be set.
-        :type param: string, regexp, or integer array
+#         :type param: string, regexp, or integer array
-        :param what: prior to set on parameter.
+#         :param what: prior to set on parameter.
-        :type what: GPy.core.Prior type
+#         :type what: GPy.core.Prior type
-
+# 
-        """
+#         """
-        if self.priors is None:
+#         if self.priors is None:
-            self.priors = [None for i in range(self._get_params().size)]
+#             self.priors = [None for i in range(self._get_params().size)]
-
+# 
-        which = self.grep_param_names(regexp)
+#         which = self.grep_param_names(regexp)
-
+# 
-        # check tied situation
+#         # check tied situation
-        tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
+#         tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
-        if len(tie_partial_matches):
+#         if len(tie_partial_matches):
-            raise ValueError, "cannot place prior across partial ties"
+#             raise ValueError, "cannot place prior across partial ties"
-        tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
+#         tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
-        if len(tie_matches) > 1:
+#         if len(tie_matches) > 1:
-            raise ValueError, "cannot place prior across multiple ties"
+#             raise ValueError, "cannot place prior across multiple ties"
-        elif len(tie_matches) == 1:
+#         elif len(tie_matches) == 1:
-            which = which[:1] # just place a prior object on the first parameter
+#             which = which[:1]  # just place a prior object on the first parameter
-
+# 
-
+# 
-        # check constraints are okay
+#         # check constraints are okay
-
+# 
-        if what.domain is _POSITIVE:
+#         if what.domain is _POSITIVE:
-            constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is _POSITIVE]
+#             constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is _POSITIVE]
-            if len(constrained_positive_indices):
+#             if len(constrained_positive_indices):
-                constrained_positive_indices = np.hstack(constrained_positive_indices)
+#                 constrained_positive_indices = np.hstack(constrained_positive_indices)
-            else:
+#             else:
-                constrained_positive_indices = np.zeros(shape=(0,))
+#                 constrained_positive_indices = np.zeros(shape=(0,))
-            bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
+#             bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
-            assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
+#             assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
-            unconst = np.setdiff1d(which, constrained_positive_indices)
+#             unconst = np.setdiff1d(which, constrained_positive_indices)
-            if len(unconst):
+#             if len(unconst):
-                print "Warning: constraining parameters to be positive:"
+#                 print "Warning: constraining parameters to be positive:"
-                print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
+#                 print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
-                print '\n'
+#                 print '\n'
-                self.constrain_positive(unconst)
+#                 self.constrain_positive(unconst)
-        elif what.domain is _REAL:
+#         elif what.domain is _REAL:
-            assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
+#             assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
-        else:
+#         else:
-            raise ValueError, "prior not recognised"
+#             raise ValueError, "prior not recognised"
-
+# 
-        # store the prior in a local list
+#         # store the prior in a local list
-        for w in which:
+#         for w in which:
-            self.priors[w] = what
+#             self.priors[w] = what
    def get_gradient(self, name, return_names=False):
        """
@ -146,36 +144,19 @@ class Model(Parameterized):
        else:
            raise AttributeError, "no parameter matches %s" % name
    def log_prior(self):
        """evaluate the prior"""
        if self.priors is not None:
            return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
        else:
            return 0.
    def _log_prior_gradients(self):
        """evaluate the gradients of the priors"""
        if self.priors is None:
            return 0.
        x = self._get_params()
        ret = np.zeros(x.size)
        [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
        return ret
    def randomize(self):
        """
        Randomize the model.
        Make this draw from the prior if one exists, else draw from N(0,1)
        """
        # first take care of all parameters (from N(0,1))
-        #x = self._get_params_transformed()
+        # x = self._get_params_transformed()
        x = np.random.randn(self.size_transformed)
        x = self._untransform_params(x)
        # now draw from prior where possible
-        if self.priors is not None and len(self.priors):
+        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
            [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
        self._set_params(x)
-        #self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        # self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
    def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
        """
@ -220,8 +201,8 @@ class Model(Parameterized):
                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
                    jobs.append(job)
-                pool.close() # signal that no more data coming in
+                pool.close()  # signal that no more data coming in
-                pool.join() # wait for all the tasks to complete
+                pool.join()  # wait for all the tasks to complete
            except KeyboardInterrupt:
                print "Ctrl+c received, terminating and joining pool."
                pool.terminate()
@ -378,7 +359,7 @@ class Model(Parameterized):
    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
        # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
-        sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
+        sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)  # @UndefinedVariable
        sgd.run()
        self.optimization_runs.append(sgd)
@ -403,17 +384,34 @@ class Model(Parameterized):
        x = self._get_params_transformed().copy()
        if not verbose:
            # make sure only to test the selected parameters
            if target_param is None:
                transformed_index = range(len(x))
            else:
                transformed_index = self._raveled_index_for(target_param)
                if self._has_fixes():
                    indices = np.r_[:self.size]
                    which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
                    transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
                if transformed_index.size == 0:
                    print "No free parameters to check"
                    return
            # just check the global ratio
-            dx = step * np.sign(np.random.uniform(-1, 1, x.size))
+            dx = np.zeros_like(x)
-
+            dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
            # evaulate around the point x
            f1 = self.objective_function(x + dx)
            f2 = self.objective_function(x - dx)
            gradient = self.objective_function_gradients(x)
            dx = dx[transformed_index]
            gradient = gradient[transformed_index]
            numerical_gradient = (f1 - f2) / (2 * dx)
-            global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
+            global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient == 0, 1e-32, gradient)))
            return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
        else:
            # check the gradient of each parameter individually, and do some pretty printing
@ -433,43 +431,50 @@ class Model(Parameterized):
            separator = '-' * len(header_string[0])
            print '\n'.join([header_string[0], separator])
            if target_param is None:
-                param_list = range(len(x))
+                param_index = range(len(x))
                transformed_index = param_index
            else:
-                param_list = self._raveled_index_for(target_param)
+                param_index = self._raveled_index_for(target_param)
                if self._has_fixes():
-                    param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
+                    indices = np.r_[:self.size]
-
+                    which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
-                if param_list.size == 0:
+                    param_index = param_index[which[0]]
                    transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
                    #print param_index, transformed_index
                else:
                    transformed_index = param_index
                if param_index.size == 0:
                    print "No free parameters to check"
                    return
            gradient = self.objective_function_gradients(x)
-            np.where(gradient==0, 1e-312, gradient)
+            np.where(gradient == 0, 1e-312, gradient)
            ret = True
-            for i, ind in enumerate(param_list):
+            for nind, xind in itertools.izip(param_index, transformed_index):
                xx = x.copy()
-                xx[ind] += step
+                xx[xind] += step
                f1 = self.objective_function(xx)
-                xx[ind] -= 2.*step
+                xx[xind] -= 2.*step
                f2 = self.objective_function(xx)
                numerical_gradient = (f1 - f2) / (2 * step)
-                ratio = (f1 - f2) / (2 * step * gradient[ind])
+                ratio = (f1 - f2) / (2 * step * gradient[xind])
-                difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
+                difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
                if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
-                    formatted_name = "\033[92m {0} \033[0m".format(names[ind])
+                    formatted_name = "\033[92m {0} \033[0m".format(names[nind])
                    ret &= True
                else:
-                    formatted_name = "\033[91m {0} \033[0m".format(names[ind])
+                    formatted_name = "\033[91m {0} \033[0m".format(names[nind])
                    ret &= False
                r = '%.6f' % float(ratio)
                d = '%.6f' % float(difference)
-                g = '%.6f' % gradient[ind]
+                g = '%.6f' % gradient[xind]
                ng = '%.6f' % float(numerical_gradient)
                grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
                print grad_string
            self._set_params_transformed(x)
            return ret
    def input_sensitivity(self):
@ -517,10 +522,10 @@ class Model(Parameterized):
        alpha = 0
        stop = False
-        #Handle **kwargs
+        # Handle **kwargs
        ep_args = {}
        for arg in kwargs.keys():
-            if arg in ('epsilon','power_ep'):
+            if arg in ('epsilon', 'power_ep'):
                ep_args[arg] = kwargs[arg]
                del kwargs[arg]
@ -528,7 +533,7 @@ class Model(Parameterized):
            last_approximation = self.likelihood.copy()
            last_params = self._get_params()
            if len(ep_args) == 2:
-                self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
+                self.update_likelihood_approximation(epsilon=ep_args['epsilon'], power_ep=ep_args['power_ep'])
            elif len(ep_args) == 1:
                if  ep_args.keys()[0] == 'epsilon':
                    self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
@ -540,8 +545,8 @@ class Model(Parameterized):
            ll_change = new_ll - last_ll
            if ll_change < 0:
-                self.likelihood = last_approximation # restore previous likelihood approximation
+                self.likelihood = last_approximation  # restore previous likelihood approximation
-                self._set_params(last_params) # restore model parameters
+                self._set_params(last_params)  # restore model parameters
                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                stop = True
            else:
--- a/GPy/core/parameterization/array_core.py
+++ b/GPy/core/parameterization/array_core.py
@ -4,32 +4,22 @@
 __updated__ = '2013-12-16'
 import numpy as np
-from parameter_core import Observable
+from parameter_core import Observable, Parameterizable
 class ListArray(np.ndarray):
    """
    ndarray which can be stored in lists and checked if it is in.
    WARNING: This overrides the functionality of x==y!!!
    Use numpy.equal(x,y) for element-wise equality testing.
    """
    def __new__(cls, input_array):
        obj = np.asanyarray(input_array).view(cls)
        return obj
    #def __eq__(self, other):
    #    return other is self
 class ParamList(list):
-
+    """
    List to store ndarray-likes in.
    It will look for 'is' instead of calling __eq__ on each element.
    """
    def __contains__(self, other):
        for el in self:
            if el is other:
                return True
        return False
-    
+
    pass
-class ObservableArray(ListArray, Observable):
+class ObservableArray(np.ndarray, Observable):
    """
    An ndarray which reports changes to its observers.
    The observers can add themselves with a callable, which
@ -38,26 +28,43 @@ class ObservableArray(ListArray, Observable):
    """
    __array_priority__ = -1 # Never give back ObservableArray
    def __new__(cls, input_array):
        obj = np.atleast_1d(input_array).view(cls)
        cls.__name__ = "ObservableArray\n     "
        obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
        obj._observers_ = {}
        return obj
    def __array_finalize__(self, obj):
        # see InfoArray.__array_finalize__ for comments
        if obj is None: return
        self._observers_ = getattr(obj, '_observers_', None)
-    
+
    def _s_not_empty(self, s):
        # this checks whether there is something picked by this slice.
        return True
        # TODO:  disarmed, for performance increase,
        if not isinstance(s, (list,tuple,np.ndarray)):
            return True
        if isinstance(s, (list,tuple)):
            return len(s)!=0
        if isinstance(s, np.ndarray):
            if s.dtype is bool:
                return np.all(s)
            else:
                return s.size != 0
    def __setitem__(self, s, val, update=True):
-        super(ObservableArray, self).__setitem__(s, val)
+        if self._s_not_empty(s):
-        if update:
+            super(ObservableArray, self).__setitem__(s, val)
-            self._notify_observers()
+            if update:
                self._notify_observers()
    def __getslice__(self, start, stop):
        return self.__getitem__(slice(start, stop))
    def __setslice__(self, start, stop, val):
-        return self.__setitem__(slice(start, stop), val)  
+        return self.__setitem__(slice(start, stop), val)
    def __copy__(self, *args):
-        return ObservableArray(self.base.base.copy(*args))
+        return ObservableArray(self.view(np.ndarray).copy())
    def copy(self, *args):
        return self.__copy__(*args)
@ -65,32 +72,27 @@ class ObservableArray(ListArray, Observable):
        r =  np.ndarray.__ror__(self, *args, **kwargs)
        self._notify_observers()
        return r
    def __ilshift__(self, *args, **kwargs):
        r = np.ndarray.__ilshift__(self, *args, **kwargs)
        self._notify_observers()
        return r
    def __irshift__(self, *args, **kwargs):
        r = np.ndarray.__irshift__(self, *args, **kwargs)
        self._notify_observers()
        return r
    def __rrshift__(self, *args, **kwargs):
        r = np.ndarray.__rrshift__(self, *args, **kwargs)
        self._notify_observers()
        return r
    def __ixor__(self, *args, **kwargs):
        r = np.ndarray.__ixor__(self, *args, **kwargs)
        self._notify_observers()
        return r
    def __rxor__(self, *args, **kwargs):
        r = np.ndarray.__rxor__(self, *args, **kwargs)
        self._notify_observers()
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@ -57,9 +57,12 @@ class ParameterIndexOperations(object):
    You can give an offset to set an offset for the given indices in the
    index array, for multi-param handling.
    '''
-    def __init__(self):
+    _offset = 0
-        self._properties = ParamDict()
+    def __init__(self, constraints=None):
-        #self._reverse = collections.defaultdict(list)
+        self._properties = IntArrayDict()
        if constraints is not None:
            for t, i in constraints.iteritems():
                self.add(t, i)
    def __getstate__(self):
        return self._properties#, self._reverse
@ -71,16 +74,19 @@ class ParameterIndexOperations(object):
    def iteritems(self):
        return self._properties.iteritems()
    def items(self):
        return self._properties.items()
    def properties(self):
        return self._properties.keys()
-    def iter_properties(self):
+    def iterproperties(self):
        return self._properties.iterkeys()
    def shift(self, start, size):
        for ind in self.iterindices():
            toshift = ind>=start
-            if len(toshift) > 0:
+            if toshift.size > 0:
                ind[toshift] += size
    def clear(self):
@ -96,12 +102,12 @@ class ParameterIndexOperations(object):
        return self._properties.values()
    def properties_for(self, index):
-        return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
+        return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
    def add(self, prop, indices):
        try:
            self._properties[prop] = combine_indices(self._properties[prop], indices)
-        except KeyError: 
+        except KeyError:
            self._properties[prop] = indices
    def remove(self, prop, indices):
@ -114,8 +120,21 @@ class ParameterIndexOperations(object):
                del self._properties[prop]
            return removed.astype(int)
        return numpy.array([]).astype(int)
    def update(self, parameter_index_view, offset=0):
        for i, v in parameter_index_view.iteritems():
            self.add(i, v+offset)
    def copy(self):
        return ParameterIndexOperations(dict(self.iteritems()))
    def __getitem__(self, prop):
        return self._properties[prop]
    def __str__(self, *args, **kwargs):
        import pprint
        return pprint.pformat(dict(self._properties))
 def combine_indices(arr1, arr2):
    return numpy.union1d(arr1, arr2)
@ -126,5 +145,97 @@ def remove_indices(arr, to_remove):
 def index_empty(index):
    return numpy.size(index) == 0 
 class ParameterIndexOperationsView(object):
    def __init__(self, param_index_operations, offset, size):
        self._param_index_ops = param_index_operations
        self._offset = offset
        self._size = size
    def __getstate__(self):
        return [self._param_index_ops, self._offset, self._size]
    def __setstate__(self, state):
        self._param_index_ops = state[0]
        self._offset = state[1]
        self._size = state[2]
    def _filter_index(self, ind):
        return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset
    def iteritems(self):
        for i, ind in self._param_index_ops.iteritems():
            ind2 = self._filter_index(ind)
            if ind2.size > 0:
                yield i, ind2 
    def items(self):
        return [[i,v] for i,v in self.iteritems()]
    def properties(self):
        return [i for i in self.iterproperties()]
    def iterproperties(self):
        for i, _ in self.iteritems():
            yield i 
    def shift(self, start, size):
        raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations'
    def clear(self):
        for i, ind in self.items():
            self._param_index_ops.remove(i, ind+self._offset)
    def size(self):
        return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
    def iterindices(self):
        for _, ind in self.iteritems():
            yield ind
    def indices(self):
        return [ind for ind in self.iterindices()]
    def properties_for(self, index):
        return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
    def add(self, prop, indices):
        self._param_index_ops.add(prop, indices+self._offset)
    def remove(self, prop, indices):
        removed = self._param_index_ops.remove(prop, indices+self._offset)
        if removed.size > 0:
            return removed - self._size + 1
        return removed
    def __getitem__(self, prop):
        ind = self._filter_index(self._param_index_ops[prop])
        if ind.size > 0:
            return ind
        raise KeyError, prop
    def __str__(self, *args, **kwargs):
        import pprint
        return pprint.pformat(dict(self.iteritems()))
    def update(self, parameter_index_view, offset=0):
        for i, v in parameter_index_view.iteritems():
            self.add(i, v+offset)
    def copy(self):
        return ParameterIndexOperations(dict(self.iteritems()))
    pass
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -3,24 +3,19 @@
 import itertools
 import numpy
-from parameter_core import Constrainable, Gradcheckable, adjust_name_for_printing
+from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing
 from array_core import ObservableArray, ParamList
 ###### printing
 __constraints_name__ = "Constraint"
 __index_name__ = "Index"
 __tie_name__ = "Tied to"
 __priors_name__ = "Prior"
 __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all
 __print_threshold__ = 5
 ######
-class Float(numpy.float64, Constrainable):
+class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable):
    def __init__(self, f, base):
        super(Float,self).__init__(f)
        self._base = base
 class Param(ObservableArray, Constrainable, Gradcheckable):
    """
    Parameter object for GPy models.
@ -46,8 +41,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
    """
    __array_priority__ = -1 # Never give back Param
    _fixes_ = None
    _parameters_ = []
    def __new__(cls, name, input_array, default_constraint=None):
        obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
        cls.__name__ = "Param"
        obj._current_slice_ = (slice(obj.shape[0]),)
        obj._realshape_ = obj.shape
        obj._realsize_ = obj.size
@ -62,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
    def __init__(self, name, input_array, default_constraint=None):
        super(Param, self).__init__(name=name, default_constraint=default_constraint)
-
+    
    def __array_finalize__(self, obj):
        # see InfoArray.__array_finalize__ for comments
        if obj is None: return
@ -80,9 +77,11 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
        self._original_ = getattr(obj, '_original_', None)
        self._name = getattr(obj, 'name', None)
        self.gradient = getattr(obj, 'gradient', None)
        self.constraints = getattr(obj, 'constraints', None)
        self.priors = getattr(obj, 'priors', None)
-    def __array_wrap__(self, out_arr, context=None):
+    #def __array_wrap__(self, out_arr, context=None):
-        return out_arr.view(numpy.ndarray)
+    #    return out_arr.view(numpy.ndarray)
    #===========================================================================
    # Pickling operations
    #===========================================================================
@ -121,157 +120,17 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
    #===========================================================================
    # get/set parameters
    #===========================================================================
    def _set_params(self, param, update=True):
        self.flat = param
-        self._notify_tied_parameters()
+        #self._notify_tied_parameters()
        self._notify_observers()
    def _get_params(self):
        return self.flat
-#     @property
+
 #     def name(self):
 #         """
 #         Name of this parameter.
 #         This can be a callable without parameters. The callable will be called
 #         every time the name property is accessed.
 #         """
 #         if callable(self.name):
 #             return self.name()
 #         return self.name
 #     @name.setter
 #     def name(self, new_name):
 #         from_name = self.name
 #         self.name = new_name
 #         self._direct_parent_._name_changed(self, from_name)
    @property
    def _parameters_(self):
        return []
    def _collect_gradient(self, target):
        target[:] = self.gradient.flat
    #===========================================================================
    # Tying operations -> bugged, TODO
    #===========================================================================
    def tie_to(self, param):
        """
        :param param: the parameter object to tie this parameter to. 
                      Can be ParamConcatenation (retrieved by regexp search)
        Tie this parameter to the given parameter.
        Broadcasting is not allowed, but you can tie a whole dimension to
        one parameter:  self[:,0].tie_to(other), where other is a one-value
        parameter.
        Note: For now only one parameter can have ties, so all of a parameter
              will be removed, when re-tieing!
        """
        #Note: this method will tie to the parameter which is the last in
        #      the chain of ties. Thus, if you tie to a tied parameter,
        #      this tie will be created to the parameter the param is tied
        #      to.
        assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__)
        param = numpy.atleast_1d(param)
        if param.size != 1:
            raise NotImplementedError, "Broadcast tying is not implemented yet"
        try:
            if self._original_:
                self[:] = param
            else: # this happens when indexing created a copy of the array
                self._direct_parent_._get_original(self)[self._current_slice_] = param
        except ValueError:
            raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape))
        if param is self:
            raise RuntimeError, 'Cyclic tieing is not allowed'
 #         if len(param._tied_to_) > 0:
 #             if (self._direct_parent_._get_original(self) is param._direct_parent_._get_original(param)
 #                 and len(set(self._raveled_index())&set(param._tied_to_[0]._raveled_index()))!=0):
 #                 raise RuntimeError, 'Cyclic tieing is not allowed'
 #             self.tie_to(param._tied_to_[0])
 #             return
        if not param in self._direct_parent_._get_original(self)._tied_to_:
            self._direct_parent_._get_original(self)._tied_to_ += [param]
        param._add_tie_listener(self)
        self._highest_parent_._set_fixed(self)
        cs = self._highest_parent_._constraints_for(param, param._raveled_index())
        for cs in self._highest_parent_._constraints_for(param, param._raveled_index()):
            [self.constrain(c, warning=False) for c in cs]
 #         for t in self._tied_to_me_.keys():
 #             if t is not self:
 #                 t.untie(self)
 #                 t.tie_to(param)
    def untie(self, *ties):
        """
        remove all ties.
        """
        [t._direct_parent_._get_original(t)._remove_tie_listener(self) for t in self._tied_to_]
        new_ties = []
        for t in self._direct_parent_._get_original(self)._tied_to_:
            for tied in t._tied_to_me_.keys():
                if t._parent_index_ is tied._parent_index_:
                    new_ties.append(tied)
        self._direct_parent_._get_original(self)._tied_to_ = new_ties
        self._direct_parent_._get_original(self)._highest_parent_._set_unfixed(self)
 #         self._direct_parent_._remove_tie(self, *params)
    def _notify_tied_parameters(self):
        for tied, ind in self._tied_to_me_.iteritems():
            tied._on_tied_parameter_changed(self.base, list(ind))
    def _add_tie_listener(self, tied_to_me):
        for t in self._tied_to_me_.keys():
            if tied_to_me._parent_index_ is t._parent_index_:
                t_rav_i = t._raveled_index()
                tr_rav_i = tied_to_me._raveled_index()
                new_index = list(set(t_rav_i) | set(tr_rav_i))
                tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
                self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index())
                del self._tied_to_me_[t]
                return
        self._tied_to_me_[tied_to_me] = set(self._raveled_index())
    def _remove_tie_listener(self, to_remove):
        for t in self._tied_to_me_.keys():
            if t._parent_index_ == to_remove._parent_index_:
                t_rav_i = t._raveled_index()
                tr_rav_i = to_remove._raveled_index()
                import ipdb;ipdb.set_trace()
                new_index = list(set(t_rav_i) - set(tr_rav_i))
                if new_index:
                    tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
                    self._tied_to_me_[tmp] = self._tied_to_me_[t]
                    del self._tied_to_me_[t]
                    if len(self._tied_to_me_[tmp]) == 0:
                        del self._tied_to_me_[tmp]
                else:
                    del self._tied_to_me_[t]
    def _on_tied_parameter_changed(self, val, ind):
        if not self._updated_:  # not fast_array_equal(self, val[ind]):
            val = numpy.atleast_1d(val)
            self._updated_ = True
            if self._original_:
                self.__setitem__(slice(None), val[ind], update=False)
            else:  # this happens when indexing created a copy of the array
                self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False)
            self._notify_tied_parameters()
            self._updated_ = False
    #===========================================================================
    # Prior Operations
    #===========================================================================
    def set_prior(self, prior):
        """
        :param prior: prior to be set for this parameter
        Set prior for this parameter.
        """
        if not hasattr(self._highest_parent_, '_set_prior'):
            raise AttributeError("Parent of type {} does not support priors".format(self._highest_parent_.__class__))
        self._highest_parent_._set_prior(self, prior)
    def unset_prior(self, *priors):
        """
        :param priors: priors to remove from this parameter
        Remove all priors from this parameter
        """
        self._highest_parent_._remove_prior(self, *priors)
    #===========================================================================
    # Array operations -> done
    #===========================================================================
@ -286,9 +145,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
        return new_arr
    def __setitem__(self, s, val, update=True):
        super(Param, self).__setitem__(s, val, update=update)
-        self._notify_tied_parameters()
+        #self._notify_tied_parameters()
-        if update:
+        if update and self._s_not_empty(s):
-            self._highest_parent_.parameters_changed()
+            self._notify_parameters_changed()
    #===========================================================================
    # Index Operations:
    #===========================================================================
@ -334,21 +194,21 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
                return numpy.r_[a]
            return numpy.r_[:b]
        return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
    #===========================================================================
-    # Convienience
+    # Convenience
    #===========================================================================
    @property
    def is_fixed(self):
        return self._highest_parent_._is_fixed(self)
-    def round(self, decimals=0, out=None):
+    #def round(self, decimals=0, out=None):
-        view = super(Param, self).round(decimals, out).view(Param)
+    #    view = super(Param, self).round(decimals, out).view(Param)
-        view.__array_finalize__(self)
+    #    view.__array_finalize__(self)
-        return view
+    #    return view
-    def _has_fixes(self):
+    #round.__doc__ = numpy.round.__doc__
        return False
    round.__doc__ = numpy.round.__doc__
    def _get_original(self, param):
        return self
    #===========================================================================
    # Printing -> done
    #===========================================================================
@ -356,7 +216,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
    def _description_str(self):
        if self.size <= 1: return ["%f" % self]
        else: return [str(self.shape)]
-    def _parameter_names(self, add_name):
+    def parameter_names(self, add_name=False):
        return [self.name]
    @property
    def flattened_parameters(self):
@ -366,7 +226,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
        return [self.shape]
    @property
    def _constraints_str(self):
-        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))]
+        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
    @property
    def _priors_str(self):
        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
    @property
    def _ties_str(self):
        return [t._short() for t in self._tied_to_] or ['']
@ -391,14 +254,15 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
                    if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
                    else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
        return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
    def _constraints_for(self, rav_index):
        return self._highest_parent_._constraints_for(self, rav_index)
    def _indices(self, slice_index=None):
        # get a int-array containing all indices in the first axis.
        if slice_index is None:
            slice_index = self._current_slice_
        if isinstance(slice_index, (tuple, list)):
            clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
            for i in range(self._realndim_-len(clean_curr_slice)):
                i+=len(clean_curr_slice)
                clean_curr_slice += range(self._realshape_[i]) 
            if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
                and len(set(map(len, clean_curr_slice))) <= 1):
                return numpy.fromiter(itertools.izip(*clean_curr_slice),
@ -407,6 +271,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
        return numpy.fromiter(itertools.product(*expanded_index),
                 dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
    def _max_len_names(self, gen, header):
        gen = map(lambda x: " ".join(map(str, x)), gen)
        return reduce(lambda a, b:max(a, len(b)), gen, len(header))
    def _max_len_values(self):
        return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical))
@ -421,21 +286,26 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
        if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
        else: indstr = ','.join(map(str, ind))
        return name + '[' + indstr + ']'
-    def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None):
+    def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
        filter_ = self._current_slice_
        vals = self.flat
        if indices is None: indices = self._indices(filter_)
        ravi = self._raveled_index(filter_)
-        if constr_matrix is None: constr_matrix = self._constraints_for(ravi)
+        if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
        if prirs is None: prirs = self.priors.properties_for(ravi)
        if ties is None: ties = self._ties_for(ravi)
        ties = [' '.join(map(lambda x: x._short(), t)) for t in ties]
        if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
        if lx is None: lx = self._max_len_values()
        if li is None: li = self._max_len_index(indices)
        if lt is None: lt = self._max_len_names(ties, __tie_name__)
-        header = "  {i:^{2}s}  |  \033[1m{x:^{1}s}\033[0;0m  |  {c:^{0}s}  |  {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__)  # nice header for printing
+        if lp is None: lp = self._max_len_names(prirs, __tie_name__)
        sep = '-'
        header_format = "  {i:{5}^{2}s}  |  \033[1m{x:{5}^{1}s}\033[0;0m  |  {c:{5}^{0}s}  |  {p:{5}^{4}s}  |  {t:{5}^{3}s}"
        if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp)  # nice header for printing
        else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__)  # nice header for printing
        if not ties: ties = itertools.cycle([''])
-        return "\n".join([header] + ["  {i!s:^{3}s}  |  {x: >{1}.{2}g}  |  {c:^{0}s}  |  {t:^{4}s}  ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)])  # return all the constraints with right indices
+        return "\n".join([header] + ["  {i!s:^{3}s}  |  {x: >{1}.{2}g}  |  {c:^{0}s}  |  {p:^{5}s}  |  {t:^{4}s}  ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)])  # return all the constraints with right indices
        # except: return super(Param, self).__str__()
 class ParamConcatenation(object):
@ -467,17 +337,17 @@ class ParamConcatenation(object):
    def __setitem__(self, s, val, update=True):
        ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
        vals = self._vals(); vals[s] = val; del val
-        [numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
+        [numpy.place(p, ind[ps], vals[ps])# and p._notify_tied_parameters()
-        for p, ps in zip(self.params, self._param_slices_)]
+         for p, ps in zip(self.params, self._param_slices_)]
        if update:
-            self.params[0]._highest_parent_.parameters_changed()
+            self.params[0]._notify_parameters_changed()
    def _vals(self):
        return numpy.hstack([p._get_params() for p in self.params])
    #===========================================================================
    # parameter operations:
    #===========================================================================
    def update_all_params(self):
-        self.params[0]._highest_parent_.parameters_changed()
+        self.params[0]._notify_parameters_changed()
    def constrain(self, constraint, warning=True):
        [param.constrain(constraint, update=False) for param in self.params]
@ -531,7 +401,7 @@ class ParamConcatenation(object):
    def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
        return self.params[0]._highest_parent_._checkgrad(self, verbose, step, tolerance)
    #checkgrad.__doc__ = Gradcheckable.checkgrad.__doc__
-    
+
    __lt__ = lambda self, val: self._vals() < val
    __le__ = lambda self, val: self._vals() <= val
    __eq__ = lambda self, val: self._vals() == val
@ -541,53 +411,20 @@ class ParamConcatenation(object):
    def __str__(self, *args, **kwargs):
        def f(p):
            ind = p._raveled_index()
-            return p._constraints_for(ind), p._ties_for(ind)
+            return p.constraints.properties_for(ind), p._ties_for(ind), p.priors.properties_for(ind)
        params = self.params
-        constr_matrices, ties_matrices = zip(*map(f, params))
+        constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params))
        indices = [p._indices() for p in params]
        lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in itertools.izip(params, constr_matrices)])
        lx = max([p._max_len_values() for p in params])
        li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)])
        lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)])
-        strings = [p.__str__(cm, i, tm, lc, lx, li, lt) for p, cm, i, tm in itertools.izip(params,constr_matrices,indices,ties_matrices)]
+        lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in itertools.izip(params, prior_matrices)])
        strings = []
        start = True
        for p, cm, i, tm, pm in itertools.izip(params,constr_matrices,indices,ties_matrices,prior_matrices):
            strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start)))
            start = False
        return "\n".join(strings)
        return "\n{}\n".format(" -"+"- | -".join(['-'*l for l in [li,lx,lc,lt]])).join(strings)
    def __repr__(self):
-        return "\n".join(map(repr,self.params))
+        return "\n".join(map(repr,self.params))
 if __name__ == '__main__':
    from GPy.core.parameterized import Parameterized
    from GPy.core.parameter import Param
    #X = numpy.random.randn(2,3,1,5,2,4,3)
    X = numpy.random.randn(3,2)
    print "random done"
    p = Param("q_mean", X)
    p1 = Param("q_variance", numpy.random.rand(*p.shape))
    p2 = Param("Y", numpy.random.randn(p.shape[0], 1))
    p3 = Param("variance", numpy.random.rand())
    p4 = Param("lengthscale", numpy.random.rand(2))
    m = Parameterized()
    rbf = Parameterized(name='rbf')
    rbf.add_parameter(p3,p4)
    m.add_parameter(p,p1,rbf)
    print "setting params"
    #print m.q_v[3:5,[1,4,5]]
    print "constraining variance"
    #m[".*variance"].constrain_positive()
    #print "constraining rbf"
    #m.rbf_l.constrain_positive()
    #m.q_variance[1,[0,5,11,19,2]].tie_to(m.rbf_v)
    #m.rbf_v.tie_to(m.rbf_l[0])
    #m.rbf_l[0].tie_to(m.rbf_l[1])
    #m.q_v.tie_to(m.rbf_v)
 #     m.rbf_l.tie_to(m.rbf_va)
    # pt = numpy.array(params._get_params_transformed())
    # ptr = numpy.random.randn(*pt.shape)
 #     params.X.tie_to(params.rbf_v)
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -1,7 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-from transformations import Logexp, NegativeLogexp, Logistic
+from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 __updated__ = '2013-12-16'
@ -19,8 +19,7 @@ class Observable(object):
        del self._observers_[observer]
    def _notify_observers(self):
        [callble(self) for callble in self._observers_.itervalues()]
-
+    
 class Pickleable(object):
    def _getstate(self):
        """
@ -52,10 +51,17 @@ class Parentable(object):
        super(Parentable,self).__init__()
        self._direct_parent_ = direct_parent
        self._parent_index_ = parent_index
-
+        
    def has_parent(self):
        return self._direct_parent_ is not None
    def _notify_parent_change(self):
        for p in self._parameters_:
            p._parent_changed(self)
    def _parent_changed(self):
        raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
    @property
    def _highest_parent_(self):
        if self._direct_parent_ is None:
@ -78,6 +84,50 @@ class Nameable(Parentable):
        if self.has_parent():
            self._direct_parent_._name_changed(self, from_name)    
 class Parameterizable(Parentable):
    def __init__(self, *args, **kwargs):
        super(Parameterizable, self).__init__(*args, **kwargs)
        from GPy.core.parameterization.array_core import ParamList
        _parameters_ = ParamList()
    def parameter_names(self, add_name=False):
        if add_name:
            return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
        return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
    def _collect_gradient(self, target):
        import itertools
        [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
    def _get_params(self):
        import numpy as np
        # don't overwrite this anymore!
        if not self.size:
            return np.empty(shape=(0,), dtype=np.float64)
        return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
    def _set_params(self, params, update=True):
        # don't overwrite this anymore!
        import itertools
        [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
        self.parameters_changed()
    def parameters_changed(self):
        """
        This method gets called when parameters have changed.
        Another way of listening to param changes is to
        add self as a listener to the param, such that
        updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
        """
        pass
    def _notify_parameters_changed(self):
        self.parameters_changed()
        if self.has_parent():
            self._direct_parent_._notify_parameters_changed()
 class Gradcheckable(Parentable):
    #===========================================================================
    # Gradchecking
@ -89,11 +139,34 @@ class Gradcheckable(Parentable):
    def _checkgrad(self, param):
        raise NotImplementedError, "Need log likelihood to check gradient against"
-
+class Indexable(object):
-class Constrainable(Nameable):
+    def _raveled_index(self):
        raise NotImplementedError, "Need to be able to get the raveled Index"
    def _internal_offset(self):
        return 0
    def _offset_for(self, param):
        raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
    def _raveled_index_for(self, param):
        """
        get the raveled index for a param
        that is an int array, containing the indexes for the flattened
        param inside this parameterized logic.
        """
        raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"        
 class Constrainable(Nameable, Indexable, Parameterizable):
    def __init__(self, name, default_constraint=None):
        super(Constrainable,self).__init__(name)
        self._default_constraint_ = default_constraint
        from index_operations import ParameterIndexOperations
        self.constraints = ParameterIndexOperations()
        self.priors = ParameterIndexOperations()
        if self._default_constraint_ is not None:
            self.constrain(self._default_constraint_)
    #===========================================================================
    # Fixing Parameters:
    #===========================================================================
@ -105,17 +178,75 @@ class Constrainable(Nameable):
        """
        if value is not None:
            self[:] = value
-        self._highest_parent_._fix(self,warning)
+        self.constrain(__fixed__, warning=warning)
        rav_i = self._highest_parent_._raveled_index_for(self)
        self._highest_parent_._set_fixed(rav_i)
    fix = constrain_fixed
    def unconstrain_fixed(self):
        """
        This parameter will no longer be fixed.
        """
-        self._highest_parent_._unfix(self)
+        unconstrained = self.unconstrain(__fixed__)
        self._highest_parent_._set_unfixed(unconstrained)    
    unfix = unconstrain_fixed
    def _set_fixed(self, index):
        import numpy as np
        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
        self._fixes_[index] = FIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED
    def _set_unfixed(self, index):
        import numpy as np
        if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
        #rav_i = self._raveled_index_for(param)[index]
        self._fixes_[index] = UNFIXED
        if np.all(self._fixes_): self._fixes_ = None  # ==UNFIXED
    def _connect_fixes(self):
        import numpy as np
        fixed_indices = self.constraints[__fixed__]
        if fixed_indices.size > 0:
            self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED
            self._fixes_[fixed_indices] = FIXED
        else:
            self._fixes_ = None
    def _has_fixes(self):
        return hasattr(self, "_fixes_") and self._fixes_ is not None
    #===========================================================================
    # Prior Operations
    #===========================================================================
    def set_prior(self, prior, warning=True, update=True):
        repriorized = self.unset_priors()
        self._add_to_index_operations(self.priors, repriorized, prior, warning, update)
    def unset_priors(self, *priors):
        return self._remove_from_index_operations(self.priors, priors)
    def log_prior(self):
        """evaluate the prior"""
        if self.priors.size > 0:
            x = self._get_params()
            return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
        return 0.
    def _log_prior_gradients(self):
        """evaluate the gradients of the priors"""
        import numpy as np
        if self.priors.size > 0:
            x = self._get_params()
            ret = np.zeros(x.size)
            [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
            return ret
        return 0.
    #===========================================================================
    # Constrain operations -> done
    #===========================================================================
    def constrain(self, transform, warning=True, update=True):
        """
        :param transform: the :py:class:`GPy.core.transformations.Transformation`
@ -125,16 +256,20 @@ class Constrainable(Nameable):
        Constrain the parameter to the given
        :py:class:`GPy.core.transformations.Transformation`.
        """
-        if self.has_parent():
+        if isinstance(transform, Transformation):
-            self._highest_parent_._add_constrain(self, transform, warning)
+            self._set_params(transform.initialize(self._get_params()), update=False)
-            if update:
+        reconstrained = self.unconstrain()
-                self._highest_parent_.parameters_changed()
+        self._add_to_index_operations(self.constraints, reconstrained, transform, warning, update)
        else:
            for p in self._parameters_:
                self._add_constrain(p, transform, warning)
            if update:
                self.parameters_changed()
    def unconstrain(self, *transforms):
        """
        :param transforms: The transformations to unconstrain from.
        remove all :py:class:`GPy.core.transformations.Transformation`
        transformats of this parameter object.
        """
        return self._remove_from_index_operations(self.constraints, transforms)
    def constrain_positive(self, warning=True, update=True):
        """
        :param warning: print a warning if re-constraining parameters.
@ -160,19 +295,6 @@ class Constrainable(Nameable):
        """
        self.constrain(Logistic(lower, upper), warning=warning, update=update)
    def unconstrain(self, *transforms):
        """
        :param transforms: The transformations to unconstrain from.
        remove all :py:class:`GPy.core.transformations.Transformation`
        transformats of this parameter object.
        """
        if self.has_parent():
            self._highest_parent_._remove_constrain(self, *transforms)
        else:
            for p in self._parameters_:
                self._remove_constrain(p, *transforms)
    def unconstrain_positive(self):
        """
        Remove positive constraint of this parameter.
@ -192,3 +314,35 @@ class Constrainable(Nameable):
        Remove (lower, upper) bounded constrain from this parameter/
        """
        self.unconstrain(Logistic(lower, upper))
    def _parent_changed(self, parent):
        from index_operations import ParameterIndexOperationsView
        self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size)
        self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size)
        self._fixes_ = None
        for p in self._parameters_:
            p._parent_changed(parent)
    def _add_to_index_operations(self, which, reconstrained, transform, warning, update):
        if warning and reconstrained.size > 0:
            print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
        which.add(transform, self._raveled_index())
        if update:
            self._notify_parameters_changed()
    def _remove_from_index_operations(self, which, transforms):
        if len(transforms) == 0:
            transforms = which.properties()
        import numpy as np
        removed = np.empty((0, ), dtype=int)
        for t in transforms:
            unconstrained = which.remove(t, self._raveled_index())
            removed = np.union1d(removed, unconstrained)
            if t is __fixed__:
                self._highest_parent_._set_unfixed(unconstrained)
        return removed
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -9,21 +9,9 @@ import itertools
 from re import compile, _pattern_type
 from param import ParamConcatenation, Param
 from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
-from index_operations import ParameterIndexOperations,\
+from transformations import __fixed__, FIXED, UNFIXED
    index_empty
 from array_core import ParamList
 #===============================================================================
 # Printing:
 __fixed__ = "fixed"
 #===============================================================================
 #===============================================================================
 # constants
 FIXED = False
 UNFIXED = True
 #===============================================================================
 class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    """
    Parameterized class
@ -69,7 +57,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    def __init__(self, name=None):
        super(Parameterized, self).__init__(name=name)
        self._in_init_ = True
        self._constraints_ = None#ParameterIndexOperations()
        self._parameters_ = ParamList()
        self.size = sum(p.size for p in self._parameters_)
        if not self._has_fixes():
@ -79,45 +66,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        self._added_names_ = set()
        del self._in_init_
    @property
    def constraints(self):
        if self._constraints_ is None:
            self._constraints_ = ParameterIndexOperations()
        return self._constraints_
    #===========================================================================
    # Parameter connection for model creation:
    #===========================================================================
 #     def set_as_parameter(self, name, array, gradient, index=None, gradient_parent=None):
 #         """
 #         :param name:     name of the param (in print and plots), can be callable without parameters
 #         :type name:      str, callable
 #         :param array:    array which the param consists of
 #         :type array:     array-like
 #         :param gradient: gradient method of the param
 #         :type gradient:  callable
 #         :param index:    (optional) index of the param when printing
 #
 #         (:param gradient_parent:  connect these parameters to this class, but tell
 #                         updates to highest_parent, this is needed when parameterized classes
 #                         contain parameterized classes, but want to access the parameters
 #                         of their children)
 #
 #
 #         Set array (e.g. self.X) as param with name and gradient.
 #         I.e: self.set_as_parameter('curvature', self.lengthscale, self.dK_dlengthscale)
 #
 #         Note: the order in which parameters are added can be adjusted by
 #               giving an index, of where to put this param in printing
 #         """
 #         if index is None:
 #             self._parameters_.append(Param(name, array, gradient))
 #         else:
 #             self._parameters_.insert(index, Param(name, array, gradient))
 #         self._connect_parameters(gradient_parent=gradient_parent)
    def _has_fixes(self):
        return hasattr(self, "_fixes_") and self._fixes_ is not None
    def add_parameter(self, param, index=None):
        """
        :param parameters:  the parameters to add
@ -128,58 +76,31 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        Add all parameters to this param class, you can insert parameters
        at any given index using the :func:`list.insert` syntax
        """
        # if param.has_parent():
        #    raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
        if param in self._parameters_ and index is not None:
-            # make sure fixes and constraints are indexed right
+            self.remove_parameter(param)
-            if self._has_fixes():
+            self.add_parameter(param, index)
                param_slice = slice(self._offset_for(param),self._offset_for(param)+param.size)
                dest_index = sum((p.size for p in self._parameters_[:index]))
                dest_slice = slice(dest_index,dest_index+param.size)
                fixes_param = self._fixes_[param_slice].copy()
                self._fixes_[param_slice] = self._fixes_[dest_slice]
                self._fixes_[dest_slice] = fixes_param
            del self._parameters_[param._parent_index_]
            self._parameters_.insert(index, param)
        elif param not in self._parameters_:
            # make sure the size is set
            if not hasattr(self, 'size'):
                self.size = sum(p.size for p in self._parameters_)
            if index is None:
                self.constraints.update(param.constraints, self.size)
                self.priors.update(param.priors, self.size)
                self._parameters_.append(param)
                # make sure fixes and constraints are indexed right
                if param._has_fixes(): fixes_param = param._fixes_.copy()
                else: fixes_param = numpy.ones(param.size, dtype=bool)
                if self._has_fixes(): self._fixes_ = np.r_[self._fixes_, fixes_param]
                elif param._has_fixes(): self._fixes_ = np.r_[np.ones(self.size, dtype=bool), fixes_param]
            else:
                start = sum(p.size for p in self._parameters_[:index])
                self.constraints.shift(start, param.size)
                self.priors.shift(start, param.size)
                self.constraints.update(param.constraints, start)
                self.priors.update(param.priors, start)
                self._parameters_.insert(index, param)
                # make sure fixes and constraints are indexed right
                if param._has_fixes(): fixes_param = param._fixes_.copy()
                else: fixes_param = numpy.ones(param.size, dtype=bool)
                ins = sum((p.size for p in self._parameters_[:index]))
                if self._has_fixes(): self._fixes_ = np.r_[self._fixes_[:ins], fixes_param, self._fixes[ins:]]
                elif not np.all(fixes_param):
                    self._fixes_ = np.ones(self.size+param.size, dtype=bool)
                    self._fixes_[ins:ins+param.size] = fixes_param
            self.size += param.size
        else:
            raise RuntimeError, """Parameter exists already added and no copy made"""
        self._connect_parameters()
-        # make sure the constraints are pulled over:
+        self._notify_parent_change()
-        if hasattr(param, "_constraints_") and param._constraints_ is not None:
+        self._connect_fixes()
-            for t, ind in param._constraints_.iteritems():
+
                self.constraints.add(t, ind+self._offset_for(param))
            param._constraints_.clear()
        if param._default_constraint_ is not None:
            self._add_constrain(param, param._default_constraint_, False)
        if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
            self._fixes_= None
    def add_parameters(self, *parameters):
        """
@ -188,44 +109,44 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        """
        [self.add_parameter(p) for p in parameters]
-    def remove_parameter(self, *names_params_indices):
+    def remove_parameter(self, param):
        """
-        :param names_params_indices: mix of parameter_names, param objects, or indices
+        :param param: param object to remove from being a parameter of this parameterized object.
            to remove from being a param of this parameterized object.
            note: if it is a string object it will not (!) be regexp-matched
                  automatically.
        """
-        self._parameters_ = ParamList([p for p in self._parameters_
+        if not param in self._parameters_:
-                        if not (p._parent_index_ in names_params_indices
+            raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
-                                or p.name in names_params_indices
+        del self._parameters_[param._parent_index_]
-                                or p in names_params_indices)])
+        self.size -= param.size
        constr = param.constraints.copy()
        param.constraints.clear()
        param.constraints = constr
        param._direct_parent_ = None
        param._parent_index_ = None
        param._connect_fixes()
        param._notify_parent_change()
        pname = adjust_name_for_printing(param.name)
        if pname in self._added_names_:
            del self.__dict__[pname]
        self._connect_parameters()
-
+        #self._notify_parent_change()
-    def parameters_changed(self):
+        self._connect_fixes()
        """
        This method gets called when parameters have changed.
        Another way of listening to param changes is to
        add self as a listener to the param, such that
        updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer``
        """
        # will be called as soon as parameters have changed
        pass
    def _connect_parameters(self):
        # connect parameterlist to this parameterized object
        # This just sets up the right connection for the params objects
        # to be used as parameters
        # it also sets the constraints for each parameter to the constraints 
        # of their respective parents 
        if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
            # no parameters for this class
            return
        sizes = [0]
        self._param_slices_ = []
-        for i,p in enumerate(self._parameters_):
+        for i, p in enumerate(self._parameters_):
            p._direct_parent_ = self
            p._parent_index_ = i
            not_unique = []
-            sizes.append(p.size+sizes[-1])
+            sizes.append(p.size + sizes[-1])
            self._param_slices_.append(slice(sizes[-2], sizes[-1]))
            pname = adjust_name_for_printing(p.name)
            # and makes sure to not delete programmatically added parameters
@ -237,7 +158,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
            elif not (pname in not_unique):
                self.__dict__[pname] = p
                self._added_names_.add(pname)
-            
+
    #===========================================================================
    # Pickling operations
    #===========================================================================
@ -255,16 +176,16 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
            cPickle.dump(self, f, protocol)
    def copy(self):
        """Returns a (deep) copy of the current model """
-        #dc = dict()
+        # dc = dict()
-        #for k, v in self.__dict__.iteritems():
+        # for k, v in self.__dict__.iteritems():
-            #if k not in ['_highest_parent_', '_direct_parent_']:
+            # if k not in ['_highest_parent_', '_direct_parent_']:
-                #dc[k] = copy.deepcopy(v)
+                # dc[k] = copy.deepcopy(v)
-        #dc = copy.deepcopy(self.__dict__)
+        # dc = copy.deepcopy(self.__dict__)
-        #dc['_highest_parent_'] = None
+        # dc['_highest_parent_'] = None
-        #dc['_direct_parent_'] = None
+        # dc['_direct_parent_'] = None
-        #s = self.__class__.new()
+        # s = self.__class__.new()
-        #s.__dict__ = dc
+        # s.__dict__ = dc
        return copy.deepcopy(self)
    def __getstate__(self):
        if self._has_get_set_state():
@ -272,8 +193,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        return self.__dict__
    def __setstate__(self, state):
        if self._has_get_set_state():
-            self._setstate(state) # set state
+            self._setstate(state)  # set state
-            #self._set_params(self._get_params()) # restore all values
+            # self._set_params(self._get_params()) # restore all values
            return
        self.__dict__ = state
    def _has_get_set_state(self):
@ -289,7 +210,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        """
        return [
                self._fixes_,
-                self._constraints_,
+                self.priors,
                self.constraints,
                self._parameters_,
                self._name,
                self._added_names_,
@ -299,9 +221,10 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        self._added_names_ = state.pop()
        self._name = state.pop()
        self._parameters_ = state.pop()
-        self._connect_parameters()
+        self.constraints = state.pop()
-        self._constraints_ = state.pop()
+        self.priors = state.pop()
        self._fixes_ = state.pop()
        self._connect_parameters()
        self.parameters_changed()
    #===========================================================================
    # Gradient control
@ -310,9 +233,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        if self.has_parent():
            return g
        x = self._get_params()
-        [numpy.put(g, i, g[i]*c.gradfactor(x[i])) for c,i in self.constraints.iteritems() if c != __fixed__]
+        [numpy.put(g, i, g[i] * c.gradfactor(x[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
        for p in self.flattened_parameters:
-            for t,i in p._tied_to_me_.iteritems():
+            for t, i in p._tied_to_me_.iteritems():
                g[self._offset_for(p) + numpy.array(list(i))] += g[self._raveled_index_for(t)]
        if self._has_fixes(): return g[self._fixes_]
        return g
@ -320,27 +243,17 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    # Optimization handles:
    #===========================================================================
    def _get_param_names(self):
-        n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
+        n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
        return n
    def _get_param_names_transformed(self):
        n = self._get_param_names()
        if self._has_fixes():
            return n[self._fixes_]
        return n
    def _get_params(self):
        # don't overwrite this anymore!
        if not self.size:
            return np.empty(shape=(0,), dtype=np.float64)
        return numpy.hstack([x._get_params() for x in self._parameters_ if x.size>0])
    def _set_params(self, params, update=True):
        # don't overwrite this anymore!
        [p._set_params(params[s], update=update) for p,s in itertools.izip(self._parameters_,self._param_slices_)]
        self.parameters_changed()
    def _get_params_transformed(self):
        # transformed parameters (apply transformation rules)
        p = self._get_params()
-        [numpy.put(p, ind, c.finv(p[ind])) for c,ind in self.constraints.iteritems() if c != __fixed__]
+        [numpy.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        if self._has_fixes():
            return p[self._fixes_]
        return p
@ -350,7 +263,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    def _untransform_params(self, p):
        p = p.copy()
        if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
-        [numpy.put(p, ind, c.f(p[ind])) for c,ind in self.constraints.iteritems() if c != __fixed__]
+        [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
        return p
    def _name_changed(self, param, old_name):
        if hasattr(self, old_name) and old_name in self._added_names_:
@ -361,11 +274,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
            self._added_names_.add(pname)
            self.__dict__[pname] = param
    #===========================================================================
-    # Index Handling
+    # Indexable Handling
    #===========================================================================
    def _backtranslate_index(self, param, ind):
        # translate an index in parameterized indexing into the index of param
-        ind = ind-self._offset_for(param)
+        ind = ind - self._offset_for(param)
        ind = ind[ind >= 0]
        internal_offset = param._internal_offset()
        ind = ind[ind < param.size + internal_offset]
@ -377,7 +290,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
                return self._param_slices_[param._direct_parent_._get_original(param)._parent_index_].start
            return self._offset_for(param._direct_parent_) + param._direct_parent_._offset_for(param)
        return 0
-
+    
    def _raveled_index_for(self, param):
        """
        get the raveled index for a param
@ -387,7 +300,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        if isinstance(param, ParamConcatenation):
            return numpy.hstack((self._raveled_index_for(p) for p in param.params))
        return param._raveled_index() + self._offset_for(param)
-
+    
    def _raveled_index(self):
        """
        get the raveled index for this object,
@ -397,36 +310,10 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    #===========================================================================
    # Fixing parameters:
    #===========================================================================
    def _set_fixed(self, param_or_index):
        if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool)
        try:
            param_or_index = self._raveled_index_for(param_or_index)
        except AttributeError:
            pass
        self._fixes_[param_or_index] = FIXED
        if numpy.all(self._fixes_): self._fixes_ = None # ==UNFIXED
    def _set_unfixed(self, param_or_index):
        if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool)
        try:
            param_or_index = self._raveled_index_for(param_or_index)
        except AttributeError:
            pass
        self._fixes_[param_or_index] = UNFIXED
        for constr, ind in self.constraints.iteritems():
            if constr is __fixed__:
                self._fixes_[ind] = FIXED
        if numpy.all(self._fixes_): self._fixes_ = None # ==UNFIXED
    def _fixes_for(self, param):
        if self._has_fixes():
            return self._fixes_[self._raveled_index_for(param)]
        return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
    def _fix(self, param, warning=True):
        f = self._add_constrain(param, __fixed__, warning)
        self._set_fixed(f)
    def _unfix(self, param):
        if self._has_fixes():
            f = self._remove_constrain(param, __fixed__)
            self._set_unfixed(f)
    #===========================================================================
    # Convenience for fixed, tied checking of param:
    #===========================================================================
@ -437,7 +324,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        if not self._has_fixes():
            return False
        return not self._fixes_[self._raveled_index_for(param)].any()
-        #return not self._fixes_[self._offset_for(param): self._offset_for(param)+param._realsize_].any()
+        # return not self._fixes_[self._offset_for(param): self._offset_for(param)+param._realsize_].any()
    @property
    def is_fixed(self):
        for p in self._parameters_:
@ -453,57 +340,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
            return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."
        return ''
    #===========================================================================
    # Constraint Handling:
    #===========================================================================
    def _add_constrain(self, param, transform, warning=True):
        rav_i = self._raveled_index_for(param)
        reconstrained = self._remove_constrain(param, index=rav_i) # remove constraints before
        # if removing constraints before adding new is not wanted, just delete the above line!
        self.constraints.add(transform, rav_i)
        param = self._get_original(param)
        if not (transform == __fixed__):
            param._set_params(transform.initialize(param._get_params()), update=False)
        if warning and any(reconstrained):
            # if you want to print the whole params object, which was reconstrained use:
            # m = str(param[self._backtranslate_index(param, reconstrained)])
            print "Warning: re-constraining parameters:\n{}".format(param._short())
        return rav_i
    def _remove_constrain(self, param, *transforms, **kwargs):
        if not transforms:
            transforms = self.constraints.properties()
        removed_indices = numpy.array([]).astype(int)
        if "index" in kwargs: index = kwargs['index']
        else: index = self._raveled_index_for(param)
        for constr in transforms:
            removed = self.constraints.remove(constr, index)
            if constr is __fixed__:
                self._set_unfixed(removed)
            removed_indices = numpy.union1d(removed_indices, removed)
        return removed_indices
    # convienience for iterating over items
    def _constraints_iter_items(self, param):
        for constr, ind in self.constraints.iteritems():
            ind = self._backtranslate_index(param, ind)
            if not index_empty(ind):
                yield constr, ind
    def _constraints_iter(self, param):
        for constr, _ in self._constraints_iter_items(param):
            yield constr
    def _contraints_iter_indices(self, param):
        # iterate through all constraints belonging to param
        for _, ind in self._constraints_iter_items(param):
            yield ind
    def _constraint_indices(self, param, constraint):
        # indices in model range for param and constraint
        return self._backtranslate_index(param, self.constraints[constraint]) + self._offset_for(param)
    def _constraints_for(self, param, rav_index):
        # constraint for param given its internal rav_index
        return self.constraints.properties_for(rav_index+self._offset_for(param))
    def _constraints_for_collect(self, param, rav_index):
        # constraint for param given its internal rav_index
        cs = self._constraints_for(param, rav_index)
        return set(itertools.chain(*cs))
    #===========================================================================
    # Get/set parameters:
    #===========================================================================
    def grep_param_names(self, regexp):
@ -539,7 +375,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
 #     def __getattribute__(self, name):
 #         #try:
 #             return object.__getattribute__(self, name)
-        #except AttributeError:
+        # except AttributeError:
        #    _, a, tb = sys.exc_info()
        #    try:
        #        return self.__getitem__(name)
@ -560,13 +396,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
            return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
        else:
            return adjust_name_for_printing(self.name)
    def _parameter_names(self, add_name=False):
        if add_name:
            return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x._parameter_names(add_name=True)]
        return [xi for x in self._parameters_ for xi in x._parameter_names(add_name=True)]
    parameter_names = property(_parameter_names, doc="Names for all parameters handled by this parameterization object -- will add hirarchy name entries for printing")
    def _collect_gradient(self, target):
        [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
    @property
    def flattened_parameters(self):
        return [xi for x in self._parameters_ for xi in x.flattened_parameters]
@ -585,6 +414,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
    def _constraints_str(self):
        return [cs for p in self._parameters_ for cs in p._constraints_str]
    @property
    def _priors_str(self):
        return [cs for p in self._parameters_ for cs in p._priors_str]
    @property
    def _description_str(self):
        return [xi for x in self._parameters_ for xi in x._description_str]
    @property
@ -592,22 +424,25 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
        return [','.join(x._ties_str) for x in self.flattened_parameters]
    def __str__(self, header=True):
-        name  = adjust_name_for_printing(self.name) + "."
+        name = adjust_name_for_printing(self.name) + "."
-        constrs = self._constraints_str; ts = self._ties_str
+        constrs = self._constraints_str; 
-        desc = self._description_str; names = self.parameter_names
+        ts = self._ties_str
        prirs = self._priors_str
        desc = self._description_str; names = self.parameter_names()
        nl = max([len(str(x)) for x in names + [name]])
        sl = max([len(str(x)) for x in desc + ["Value"]])
-        cl = max([len(str(x)) if x else 0 for x in constrs  + ["Constraint"]])
+        cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
        tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
-        format_spec = "  \033[1m{{name:<{0}s}}\033[0;0m  |  {{desc:^{1}s}}  |  {{const:^{2}s}}  |  {{t:^{3}s}}".format(nl, sl, cl, tl)
+        pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
        format_spec = "  \033[1m{{name:<{0}s}}\033[0;0m  |  {{desc:^{1}s}}  |  {{const:^{2}s}}  |  {{pri:^{3}s}}  |  {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
        to_print = []
-        for n, d, c, t in itertools.izip(names, desc, constrs, ts):
+        for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
-            to_print.append(format_spec.format(name=n, desc=d, const=c, t=t))
+            to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
-        #to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self._parameters_, constrs, ts)]
+        # to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self._parameters_, constrs, ts)]
-        sep = '-'*(nl+sl+cl+tl+8*2+3)
+        sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
        if header:
-            header = "  {{0:<{0}s}}  |  {{1:^{1}s}}  |  {{2:^{2}s}}  |  {{3:^{3}s}}".format(nl, sl, cl, tl).format(name, "Value", "Constraint", "Tied to")
+            header = "  {{0:<{0}s}}  |  {{1:^{1}s}}  |  {{2:^{2}s}}  |  {{3:^{3}s}}  |  {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to")
-            #header += '\n' + sep
+            # header += '\n' + sep
            to_print.insert(0, header)
        return '\n'.format(sep).join(to_print)
    pass
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -9,17 +9,20 @@ from domains import _REAL, _POSITIVE
 import warnings
 import weakref
-class Prior:
+class Prior(object):
    domain = None
    def pdf(self, x):
        return np.exp(self.lnpdf(x))
    def plot(self):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        from ..plotting.matplot_dep import priors_plots
+        from ...plotting.matplot_dep import priors_plots
        priors_plots.univariate_plot(self)
    def __repr__(self, *args, **kwargs):
        return self.__str__()
 class Gaussian(Prior):
    """
@ -150,6 +153,7 @@ class MultivariateGaussian:
        return np.random.multivariate_normal(self.mu, self.var, n)
    def plot(self):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from ..plotting.matplot_dep import priors_plots
        priors_plots.multivariate_plot(self)
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@ -6,8 +6,17 @@ import numpy as np
 from domains import _POSITIVE,_NEGATIVE, _BOUNDED
 import sys
 import weakref
 _lim_val = -np.log(sys.float_info.epsilon)
 #===============================================================================
 # Fixing constants
 __fixed__ = "fixed"
 FIXED = False
 UNFIXED = True
 #===============================================================================
 class Transformation(object):
    domain = None
    _instance = None
@ -27,11 +36,14 @@ class Transformation(object):
        raise NotImplementedError
    def __str__(self):
        raise NotImplementedError
    def __repr__(self):
        return self.__class__.__name__
 class Logexp(Transformation):
    domain = _POSITIVE
    def f(self, x):
-        return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
+        return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
        #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
    def finv(self, f):
        return np.where(f>_lim_val, f, np.log(np.exp(f) - 1.))
    def gradfactor(self, f):
@ -56,7 +68,7 @@ class NegativeLogexp(Transformation):
        return -self.logexp.initialize(f)  # np.abs(f)
    def __str__(self):
        return '-ve'
-
+    
 class LogexpClipped(Logexp):
    max_bound = 1e100
    min_bound = 1e-10
@ -94,7 +106,6 @@ class LogexpClipped(Logexp):
    def __str__(self):
        return '+ve_c'
 class Exponent(Transformation):
    # TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
    domain = _POSITIVE
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
+from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, dtrtrs, dpotrs, dpotri
 from gp import GP
 from parameterization.param import Param
 from ..inference.latent_function_inference import varDTC
@ -38,9 +38,9 @@ class SparseGP(GP):
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian):
                inference_method = varDTC.VarDTC()
-        else:
+            else:
-            #inference_method = ??
+                #inference_method = ??
-            raise NotImplementedError, "what to do what to do?"
+                raise NotImplementedError, "what to do what to do?"
            print "defaulting to ", inference_method, "for latent function inference"
        self.Z = Param('inducing inputs', Z)
@ -51,19 +51,23 @@ class SparseGP(GP):
        self.X_variance = X_variance
        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
        self.add_parameter(self.Z, index=0)
        self.parameters_changed()
    def _update_gradients_Z(self, add=False):
    #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
        if not self.Z.is_fixed:
            if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            if self.X_variance is None:
                self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
            else:
                self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
                self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
    def parameters_changed(self):
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
-
+        self._update_gradients_Z(add=False)
        #The derivative of the bound wrt the inducing inputs Z
        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
        if self.X_variance is None:
            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
        else:
            self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
            self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
    def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
        """
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -87,18 +87,22 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0
-    bern_noise_model = GPy.likelihoods.bernoulli()
+    likelihood = GPy.likelihoods.Bernoulli()
-    laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model)
+    laplace_inf = GPy.inference.latent_function_inference.Laplace()
    kernel = GPy.kern.rbf(1)
    # Model definition
-    m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
+    m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
    print m
    # Optimize
    if optimize:
        #m.update_likelihood_approximation()
        # Parameters optimization:
-        m.optimize('bfgs', messages=1)
+        try:
            m.optimize('scg', messages=1)
        except Exception as e:
            return m
        #m.pseudo_EM()
    # Plot
--- a/GPy/examples/non_gaussian.py
+++ b/GPy/examples/non_gaussian.py
@ -42,38 +42,35 @@ def student_t_approx(optimize=True, plot=True):
    kernel4 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
    #Gaussian GP model on clean data
-    m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
+    #m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
-    # optimize
+    ## optimize
-    m1.ensure_default_constraints()
+    #m1['white'].constrain_fixed(1e-5)
-    m1['white'] = 1e-5
+    #m1.randomize()
    m1['white'].constrain_fixed('white')
    m1.randomize()
-    #Gaussian GP model on corrupt data
+    ##Gaussian GP model on corrupt data
-    m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
+    #m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
-    m2.ensure_default_constraints()
+    #m1['white'].constrain_fixed(1e-5)
-    m1['white'] = 1e-5
+    #m2.randomize()
    m1['white'].constrain_fixed('white')
    m2.randomize()
    #Student t GP model on clean data
    t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
-    laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
+    laplace_inf = GPy.inference.latent_function_inference.Laplace()
    m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
    m3.ensure_default_constraints()
    m3['t_noise'].constrain_bounded(1e-6, 10.)
-    m3['white'] = 1e-5
+    m3['white'].constrain_fixed(1e-5)
    m3['white'].constrain_fixed()
    m3.randomize()
    debug = True
    print m3
    if debug:
        m3.optimize(messages=1)
        return m3
    #Student t GP model on corrupt data
    t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
-    laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
+    laplace_inf = GPy.inference.latent_function_inference.Laplace()
    m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
    m4.ensure_default_constraints()
    m4['t_noise'].constrain_bounded(1e-6, 10.)
-    m4['white'] = 1e-5
+    m4['white'].constrain_fixed(1e-5)
    m4['white'].constrain_fixed()
    m4.randomize()
    if optimize:
@ -156,7 +153,6 @@ def boston_example(optimize=True, plot=True):
        #Gaussian GP
        print "Gauss GP"
        mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
        mgp.ensure_default_constraints()
        mgp.constrain_fixed('white', 1e-5)
        mgp['rbf_len'] = rbf_len
        mgp['noise'] = noise
@ -174,7 +170,6 @@ def boston_example(optimize=True, plot=True):
        g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
        g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
        mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
        mg.ensure_default_constraints()
        mg.constrain_positive('noise_variance')
        mg.constrain_fixed('white', 1e-5)
        mg['rbf_len'] = rbf_len
@ -194,7 +189,6 @@ def boston_example(optimize=True, plot=True):
            t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
            stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
            mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
            mstu_t.ensure_default_constraints()
            mstu_t.constrain_fixed('white', 1e-5)
            mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
            mstu_t['rbf_len'] = rbf_len
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -473,7 +473,6 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
    Z = np.random.uniform(-3., 3., (7, 1))
    k = GPy.kern.rbf(1)
    import ipdb;ipdb.set_trace()
    # create simple GP Model - no input uncertainty on this one
    m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -9,12 +9,12 @@ prior over a finite set of points f. This prior is
 where K is the kernel matrix.
 We also have a likelihood (see GPy.likelihoods) which defines how the data are
-related to the latent function: p(y | f).  If the likelihood is also a Gaussian, 
+related to the latent function: p(y | f).  If the likelihood is also a Gaussian,
-the inference over f is tractable (see exact_gaussian_inference.py). 
+the inference over f is tractable (see exact_gaussian_inference.py).
 If the likelihood object is something other than Gaussian, then exact inference
 is not tractable. We then resort to a Laplace approximation (laplace.py) or
-expectation propagation (ep.py). 
+expectation propagation (ep.py).
 The inference methods return a "Posterior" instance, which is a simple
 structure which contains a summary of the posterior. The model classes can then
@ -24,5 +24,8 @@ etc.
 """
 from exact_gaussian_inference import ExactGaussianInference
-from laplace import LaplaceInference
+from laplace import Laplace
 expectation_propagation = 'foo' # TODO
 from varDTC import VarDTC
 from dtc import DTC
 from fitc import FITC
--- a/GPy/inference/latent_function_inference/dtc.py
+++ b/GPy/inference/latent_function_inference/dtc.py
@ -22,6 +22,10 @@ class DTC(object):
    def inference(self, kern, X, X_variance, Z, likelihood, Y):
        assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
        #TODO: MAX! fix this!
        from ...util.misc import param_to_array
        Y = param_to_array(Y)
        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape
@ -40,9 +44,8 @@ class DTC(object):
        Kmmi, L, Li, _ = pdinv(Kmm)
        # Compute A
-        LiUT, _ = dtrtrs(L, U.T*np.sqrt(beta), lower=1)
+        LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
-        A_I = tdot(LiUT)
+        A = tdot(LiUTbeta) + np.eye(num_inducing)
        A = A_I + np.eye(num_inducing)
        # factor A
        LA = jitchol(A)
@ -52,44 +55,37 @@ class DTC(object):
        b, _ = dtrtrs(LA, tmp*beta, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
-        tmp = tdrtrs(LA, Li, lower=1, trans=0)
+        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)
        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*num_data*output_dim*np.log(beta) + \
-                       -0.5*beta*np.sum(np.square(Y)) + 
+                       -0.5*beta*np.sum(np.square(Y)) + \
                       0.5*np.sum(np.square(b))
        # Compute dL_dKmm
-        tmp, _ = dtrtrs(L, A_I, lower=1, trans=1)
+        vvT_P = tdot(v.reshape(-1,1)) + P
-        dL_dK, _ = dtrtrs(L, tmp.T, lower=1, trans=0)
+        dL_dK = 0.5*(Kmmi - vvT_P)
        tmp, _ = dtrtrs(LA, tmp.T. lower=1, trans=1)
        dL_dK -= tdot(tmp.T)
        dL_dK *= output_dim
        dL_dK -= tdot(v)
        dL_dK /=2.
        # Compute dL_dU
        vvT_P = tdot(v.reshape(-1,1)) + P
        vY = np.dot(v.reshape(-1,1),Y.T)
-        dL_dU = vY + np.dot(vvT_P, U.T)
+        dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU *= beta
        #compute dL_dR
        Uv = np.dot(U, v)
-        dL_dR = 0.5*(np.sum(U*np.dot(P, U.T), 1) - beta * np.sum(np.square(Y, 1)) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1)
+        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
                )*beta**2
-        grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU}
+        grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
        #update gradients
        kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
        likelihood.update_gradients(dL_dR)
        #construct a posterior object
-        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
+        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
        return post, log_marginal, grad_dict
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@ -1,225 +1,95 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-class VarDTC(object):
+from posterior import Posterior
 from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
 import numpy as np
 log_2_pi = np.log(2*np.pi)
 class FITC(object):
    """
    An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
    The function self.inference returns a Posterior object, which summarizes
    the posterior.
    """
    def __init__(self):
        self.const_jitter = 1e-6
    def inference(self, kern, X, X_variance, Z, likelihood, Y):
        assert X_variance is None, "cannot use X_variance with FITC. Try varDTC."
        #TODO: MAX! fix this!
        from ...util.misc import param_to_array
        Y = param_to_array(Y)
        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape
-        #make sure we're not using variational uncertain inputs
+        #make sure the noise is not hetero
-        assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
+        sigma_n = np.squeeze(likelihood.variance)
-        #Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
+        if sigma_n.size <1:
            raise NotImplementedError, "no hetero noise with this implementation of FITC"
        #see whether we've got a different noise variance for each datum
        sigma2 = np.squeeze(likelihood.variance)
        het_noise = False
        if sigma2.size <1:
            het_noise = True
        # kernel computations, using BGPLVM notation
        Kmm = kern.K(Z)
-        Kdiag = kern.Kdiag(X)
+        Knn = kern.Kdiag(X)
-        Kmn = kern.K(X, Z)
+        Knm = kern.K(X, Z)
        U = Knm
-        #factor Kmm
+        #factor Kmm 
-        Lm = jitchol(Kmm)
+        Kmmi, L, Li, _ = pdinv(Kmm)
        V = dtrtrs(Lm, Kmn, lower=1)
-        #compute effective noise
+        #compute beta_star, the effective noise precision
-        g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
+        LiUT = np.dot(Li, U.T)
        sigma_star = Knn + sigma_n - np.sum(np.square(LiUT),0)
        beta_star = 1./sigma_star
-        #compute and factor B
+        # Compute and factor A
-        tmp = Kmn / np.sqrt(g_snd)
+        A = tdot(LiUT*np.sqrt(beta_star)) + np.eye(num_inducing)
-        tmp, _ = dtrtrs(Lm, tmp, lower=1)
+        LA = jitchol(A)
        A = tdot(tmp)
        B = np.eye(num_inducing) + A
        Bi, Lb, LBi, log_det_B = pdinv(B)
-        #compute posterior parameters
+        # back substutue to get b, P, v
-        tmp, _ = dtrtrs()
+        URiy = np.dot(U.T*beta_star,Y)
-        woodbury_vec = 
+        tmp, _ = dtrtrs(L, URiy, lower=1)
        b, _ = dtrtrs(LA, tmp, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)
-        
+        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*output_dim*np.sum(np.log(beta_star)) + \
                       -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
                       0.5*np.sum(np.square(b))
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta_star**2
-        psi1V = np.dot(self.psi1, self.V_star)
+        # Compute dL_dKmm
-        Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
+        vvT_P = tdot(v.reshape(-1,1)) + P
-        LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
+        dL_dK = 0.5*(Kmmi - vvT_P)
        KiU = np.dot(Kmmi, U.T)
        dL_dK += np.dot(KiU*dL_dR, KiU.T) 
-        Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
+        # Compute dL_dU
-        b_psi1_Ki = self.beta_star * Kmmipsi1.T
+        vY = np.dot(v.reshape(-1,1),Y.T)
-        Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
+        dL_dU = vY - np.dot(vvT_P, U.T)
-        Kmmi = np.dot(self.Lmi.T,self.Lmi)
+        dL_dU *= beta_star
-        LBiLmi = np.dot(self.LBi,self.Lmi)
+        dL_dU -= 2.*KiU*dL_dR
        LBL_inv = np.dot(LBiLmi.T,LBiLmi)
        VVT = np.outer(self.V_star,self.V_star)
        VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
        Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
        psi1beta = self.psi1*self.beta_star.T
        H = self.Kmm + mdot(self.psi1,psi1beta.T)
        LH = jitchol(H)
        LHi = chol_inv(LH)
        Hi = np.dot(LHi.T,LHi)
-        betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
+        grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T}
        alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
        gamma_1 = mdot(VVT,self.psi1.T,Hi)
        pHip = mdot(self.psi1.T,Hi,self.psi1)
        gamma_2 = mdot(self.beta_star*pHip,self.V_star)
        gamma_3 = self.V_star * gamma_2
-        self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
+        #update gradients
-        self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
+        kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
-        self._dL_dpsi0 += .5 *alpha #dC_dpsi0
+        likelihood.update_gradients(dL_dR)
        self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
-        self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
+        #construct a posterior object
-        self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
+        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
        self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
-        self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
+        return post, log_marginal, grad_dict
        self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
        self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
        self._dpsi1_dtheta = 0
        self._dpsi1_dX = 0
        self._dKmm_dtheta = 0
        self._dKmm_dX = 0
        self._dpsi1_dX_jkj = 0
        self._dpsi1_dtheta_jkj = 0
        for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
            K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
            _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
            _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
            self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
            self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
            self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
            self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
        # the partial derivative vector for the likelihood
        if self.likelihood.num_params == 0:
            # save computation here.
            self.partial_for_likelihood = None
        elif self.likelihood.is_heteroscedastic:
            raise NotImplementedError, "heteroscedatic derivates not implemented."
        else:
            # likelihood is not heterscedatic
            dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
            Lmi_psi1 = mdot(self.Lmi,self.psi1)
            LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
            aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
            aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
            aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
            dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
            dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) *  Lmi_psi1 * dbstar_dnoise.T)
            dD_dnoise_1 =  mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
            alpha = mdot(LBiLmipsi1,self.V_star)
            alpha_ = mdot(LBiLmipsi1.T,alpha)
            dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
            dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
            dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
            dD_dnoise = dD_dnoise_1 + dD_dnoise_2
            self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
    def log_likelihood(self):
        """ Compute the (lower bound on the) log marginal likelihood """
        A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
        C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
        D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
        return A + C + D
    def _log_likelihood_gradients(self):
        pass
        return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
    def dL_dtheta(self):
        dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
        dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
        dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
        dL_dtheta += self._dKmm_dtheta
        dL_dtheta += self._dpsi1_dtheta
        return dL_dtheta
    def dL_dZ(self):
        dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
        dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
        dL_dZ += self._dpsi1_dX
        dL_dZ += self._dKmm_dX
        return dL_dZ
    def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
        assert X_variance_new is None, "FITC model is not defined for handling uncertain inputs."
        if self.likelihood.is_heteroscedastic:
            Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
            self.Diag = self.Diag0 * Iplus_Dprod_i
            self.P = Iplus_Dprod_i[:,None] * self.psi1.T
            self.RPT0 = np.dot(self.Lmi,self.psi1)
            self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
            self.R,info = dtrtrs(self.L,self.Lmi,lower=1)
            self.RPT = np.dot(self.R,self.P.T)
            self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
            self.w = self.Diag * self.likelihood.v_tilde
            self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
            self.mu = self.w + np.dot(self.P,self.Gamma)
            """
            Make a prediction for the generalized FITC model
            Arguments
            ---------
            X : Input prediction data - Nx1 numpy array (floats)
            """
            # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
            # Ci = I + (RPT0)Di(RPT0).T
            # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
            #   = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
            #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
            #   = I - V.T * V
            U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
            V,info = dtrtrs(U,self.RPT0.T,lower=1)
            C = np.eye(self.num_inducing) - np.dot(V.T,V)
            mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
            #self.C = C
            #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
            #self.mu_u = mu_u
            #self.U = U
            # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
            mu_H = np.dot(mu_u,self.mu)
            self.mu_H = mu_H
            Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
            # q(f_star|y) = N(f_star|mu_star,sigma2_star)
            Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
            KR0T = np.dot(Kx.T,self.Lmi.T)
            mu_star = np.dot(KR0T,mu_H)
            if full_cov:
                Kxx = self.kern.K(Xnew,which_parts=which_parts)
                var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T))
            else:
                Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
                var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None]
            return mu_star[:,None],var
        else:
            raise NotImplementedError, "Heteroscedastic case not implemented."
            """
            Kx = self.kern.K(self.Z, Xnew)
            mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
            if full_cov:
                Kxx = self.kern.K(Xnew)
                var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
            else:
                Kxx = self.kern.Kdiag(Xnew)
                var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
            return mu,var[:,None]
            """
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -17,7 +17,7 @@ from posterior import Posterior
 import warnings
 from scipy import optimize
-class LaplaceInference(object):
+class Laplace(object):
    def __init__(self):
        """
@ -52,6 +52,7 @@ class LaplaceInference(object):
        f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
        self.f_hat = f_hat
        #Compute hessian and other variables at mode
        log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
@ -92,12 +93,11 @@ class LaplaceInference(object):
        iteration = 0
        while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
            W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
            W_f = W*f
            grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
            W_f = W*f
            b = W_f + grad # R+W p46 line 6.
            #W12BiW12Kb, B_logdet = self._compute_B_statistics(K, W.copy(), np.dot(K, b), likelihood.log_concave)
            W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
            W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
@ -216,7 +216,9 @@ class LaplaceInference(object):
        """
        if not log_concave:
            #print "Under 1e-10: {}".format(np.sum(W < 1e-6))
-            W[W < 1e-6] = 1e-6  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+            # W[W<1e-6] = 1e-6
            # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
            W.__setitem__(W < 1e-6, 1e-6, update=False)  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                # To cause the posterior to become less certain than the prior and likelihood,
                                # This is a property only held by non-log-concave likelihoods
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
-from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify
+from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri
 class Posterior(object):
    """
@ -80,8 +80,8 @@ class Posterior(object):
    @property
    def covariance(self):
        if self._covariance is None:
-            LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
+            #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
-            self._covariance = self._K - tdot(LiK.T)
+            self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
        return self._covariance
    @property
@ -93,20 +93,30 @@ class Posterior(object):
    @property
    def woodbury_chol(self):
        if self._woodbury_chol is None:
-            #try computing woodbury chol from cov
+            #compute woodbury chol from 
            if self._woodbury_inv is not None:
                _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
                #Li = jitchol(self._woodbury_inv)
                #self._woodbury_chol, _ = dtrtri(Li)
                #W, _, _, _, = pdinv(self._woodbury_inv)
                #symmetrify(W)
                #self._woodbury_chol = jitchol(W)
            #try computing woodbury chol from cov
            elif self._covariance is not None:
                raise NotImplementedError, "TODO: check code here"
                B = self._K - self._covariance
                tmp, _ = dpotrs(self.K_chol, B)
                self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
                _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
            else:
                raise ValueError, "insufficient information to compute posterior"
        return self._woodbury_chol
    @property
    def woodbury_inv(self):
        if self._woodbury_inv is None:
-            self._woodbury_inv, _ = dpotri(self.woodbury_chol)
+            self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
            #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
            symmetrify(self._woodbury_inv)
        return self._woodbury_inv
--- a/GPy/inference/latent_function_inference/varDTC.py
+++ b/GPy/inference/latent_function_inference/varDTC.py
@ -34,7 +34,7 @@ class VarDTC(object):
        Note that L may have fewer columns than Y.
        """
        N, D = Y.shape
-        if (N>D):
+        if (N>=D):
            return param_to_array(Y)
        else:
            return jitchol(tdot(Y))
@ -91,7 +91,7 @@ class VarDTC(object):
                tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
            else:
                tmp = psi1 * (np.sqrt(beta))
-            tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
+            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)
        # factor B
@ -100,14 +100,16 @@ class VarDTC(object):
        LB = jitchol(B)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
-        self.YYTfactor = self.get_YYTfactor(Y)
+        #self.YYTfactor = self.get_YYTfactor(Y)
-        VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
+        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        VVT_factor = beta*param_to_array(Y)
        trYYT = self.get_trYYT(Y)
        psi1Vf = np.dot(psi1.T, VVT_factor)
        # back substutue C into psi1Vf
-        tmp, info1 = dtrtrs(Lm, np.asfortranarray(psi1Vf), lower=1, trans=0)
+        tmp, info1 = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
-        _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, np.asfortranarray(tmp), lower=1, trans=0)
+        _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
@ -152,8 +154,8 @@ class VarDTC(object):
                raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
            else:
                LBi = chol_inv(LB)
-                Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
+                Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
-                _LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
+                _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
                partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
                partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
@ -195,12 +197,14 @@ class VarDTC(object):
        if VVT_factor.shape[1] == Y.shape[1]:
            woodbury_vector = Cpsi1Vf # == Cpsi1V
        else:
            print 'foobar'
            psi1V = np.dot(Y.T*beta, psi1).T
-            tmp, _ = dtrtrs(Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
-        Bi, _ = dpotri(LB, lower=0)
+        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = dpotri(LB, lower=1)[0]
        woodbury_inv = backsub_both_sides(Lm, np.eye(num_inducing) - Bi)
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@ -59,8 +59,9 @@ class Optimizer():
        """
        See GPy.plotting.matplot_dep.inference_plots
        """
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
-        from ..plotting.matplot_dep import inference_plots
+        from ...plotting.matplot_dep import inference_plots
        inference_plots.plot_optimizer(self)
--- a/GPy/kern/parts/bias.py
+++ b/GPy/kern/parts/bias.py
@ -14,7 +14,8 @@ class Bias(Kernpart):
        :type variance: float
        """
        super(Bias, self).__init__(input_dim, name)
-        self.variance = Param("variance", variance)
+        from ...core.parameterization.transformations import Logexp
        self.variance = Param("variance", variance, Logexp())
        self.add_parameter(self.variance)
    def K(self,X,X2,target):
--- a/GPy/kern/parts/linear.py
+++ b/GPy/kern/parts/linear.py
@ -61,7 +61,7 @@ class Linear(Kernpart):
    def update_gradients_full(self, dL_dK, X):
        #self.variances.gradient[:] = 0
-        self._param_grad_helper(dL_dK, X, self.variances.gradient)
+        self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
    def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
        tmp = dL_dKdiag[:, None] * X ** 2
--- a/GPy/kern/parts/rbf.py
+++ b/GPy/kern/parts/rbf.py
@ -53,7 +53,7 @@ class RBF(Kernpart):
        self.variance = Param('variance', variance, Logexp())
-        self.lengthscale = Param('lengthscale', lengthscale)
+        self.lengthscale = Param('lengthscale', lengthscale, Logexp())
        self.lengthscale.add_observer(self, self.update_lengthscale)
        self.update_lengthscale(self.lengthscale)
@ -264,7 +264,7 @@ class RBF(Kernpart):
            }
            """
            num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
-            X, dvardLdK = param_to_array(X, dvardLdK)
+            X, dvardLdK, var_len3 = param_to_array(X, dvardLdK, var_len3)
            weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        else:
            code = """
@ -281,7 +281,7 @@ class RBF(Kernpart):
            }
            """
            num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
-            X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
+            X, X2, dvardLdK, var_len3 = param_to_array(X, X2, dvardLdK, var_len3)
            weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        return target
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -116,7 +116,8 @@ class Bernoulli(Likelihood):
            Each y_i must be in {0, 1}
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        objective = (link_f**y) * ((1.-link_f)**(1.-y))
+        #objective = (link_f**y) * ((1.-link_f)**(1.-y))
        objective = np.where(y, link_f, 1.-link_f)
        return np.exp(np.sum(np.log(objective)))
    def logpdf_link(self, link_f, y, extra_data=None):
@ -136,7 +137,9 @@ class Bernoulli(Likelihood):
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
        #objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
        state = np.seterr(divide='ignore')
        objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
        np.seterr(**state)
        return np.sum(objective)
    def dlogpdf_dlink(self, link_f, y, extra_data=None):
@ -155,7 +158,10 @@ class Bernoulli(Likelihood):
        :rtype: Nx1 array
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        grad = (y/link_f) - (1.-y)/(1-link_f)
+        #grad = (y/link_f) - (1.-y)/(1-link_f)
        state = np.seterr(divide='ignore')
        grad = np.where(y, 1./link_f, -1./(1-link_f))
        np.seterr(**state)
        return grad
    def d2logpdf_dlink2(self, link_f, y, extra_data=None):
@ -180,7 +186,10 @@ class Bernoulli(Likelihood):
            (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
+        #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
        state = np.seterr(divide='ignore')
        d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
        np.seterr(**state)
        return d2logpdf_dlink2
    def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@ -199,7 +208,10 @@ class Bernoulli(Likelihood):
        :rtype: Nx1 array
        """
        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
+        #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
        state = np.seterr(divide='ignore')
        d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
        np.seterr(**state)
        return d3logpdf_dlink3
    def samples(self, gp):
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -88,12 +88,9 @@ class Gaussian(Likelihood):
        return mu, var, low, up
    def predictive_mean(self, mu, sigma):
        #new_sigma2 = self.predictive_variance(mu, sigma)
        #return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
        return mu
    def predictive_variance(self, mu, sigma, predictive_mean=None):
        #what on earth was the sum of precisions doing here? JH
        return self.variance + sigma**2
    def pdf_link(self, link_f, y, extra_data=None):
@ -305,3 +302,11 @@ class Gaussian(Likelihood):
        gp = gp.flatten()
        Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)
    def log_predictive_density(self, y_test, mu_star, var_star):
        """
        assumes independence
        """
        v = var_star + self.variance
        return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -23,7 +23,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
    """
    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
+                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
        if X == None:
            X = self.initialise_latent(init, input_dim, Y)
        self.init = init
@ -37,7 +37,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
        if kernel is None:
            kernel = kern.rbf(input_dim) # + kern.white(input_dim)
-
+        
        if likelihood is None:
            likelihood = Gaussian()
        self.q = Normal(X, X_variance)
        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
        self.add_parameter(self.q, index=0)
@ -70,9 +72,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
        return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
    def parameters_changed(self):
-        super(BayesianGPLVM, self).parameters_changed()
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
-        self._log_marginal_likelihood -= self.KL_divergence()
+        self._update_gradients_Z(add=False)
        self._log_marginal_likelihood -= self.KL_divergence()
        dL_dmu, dL_dS = self.dL_dmuS()
        # dL:
@ -159,6 +162,38 @@ class BayesianGPLVM(SparseGP, GPLVM):
        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
 class BayesianGPLVMWithMissingData(BayesianGPLVM):
    def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, 
        Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
        from ..util.subarray_and_sorting import common_subarrays
        self.subarrays = common_subarrays(Y)
        import ipdb;ipdb.set_trace()
        BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs)
    def parameters_changed(self):
        super(BayesianGPLVM, self).parameters_changed()
        self._log_marginal_likelihood -= self.KL_divergence()
        dL_dmu, dL_dS = self.dL_dmuS()
        # dL:
        self.q.mean.gradient  = dL_dmu
        self.q.variance.gradient  = dL_dS  
        # dKL:
        self.q.mean.gradient -= self.X
        self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
 if __name__ == '__main__':
    import numpy as np
    X = np.random.randn(20,2)
    W = np.linspace(0,1,10)[None,:]
    Y = (X*W).sum(1)
    missing = np.random.binomial(1,.1,size=Y.shape)
    pass
 def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
    """
    objective function for fitting the latent variables for test points
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@ -6,6 +6,7 @@ import numpy as np
 from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import VarDTC
 class SparseGPRegression(SparseGP):
    """
@ -43,8 +44,7 @@ class SparseGPRegression(SparseGP):
        likelihood = likelihoods.Gaussian()
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC())
        #self.ensure_default_constraints()
    def _getstate(self):
        return SparseGP._getstate(self)
--- a/GPy/plotting/matplot_dep/inference_plots.py
+++ b/GPy/plotting/matplot_dep/inference_plots.py
@ -2,6 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import pylab as pb
 import sys
 #import numpy as np
 #import Tango
 #from base_plots import gpplot, x_frame1D, x_frame2D
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -94,14 +94,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
                ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
                #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
-        #add inducing inputs (if a sparse model is used)
+        
        if hasattr(model,"Z"):
            #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
            Zu = param_to_array(model.Z[:,free_dims])
            z_height = ax.get_ylim()[0]
            ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
        #add error bars for uncertain (if input uncertainty is being modelled)
        if hasattr(model,"has_uncertain_inputs"):
            ax.errorbar(model.X[which_data, free_dims], model.likelihood.data[which_data, 0],
@ -115,6 +108,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)
        #add inducing inputs (if a sparse model is used)
        if hasattr(model,"Z"):
            #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
            Zu = param_to_array(model.Z[:,free_dims])
            z_height = ax.get_ylim()[0]
            ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
    #2D plotting
    elif len(free_dims) == 2:
@ -132,7 +134,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
            m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
            Y = model.likelihood.Y
        else:
-            m, _, _, _ = model.predict(Xgrid, which_parts=which_parts,sampling=False)
+            m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
            Y = model.likelihood.data
        for d in which_data_ycols:
            m_d = m[:,d].reshape(resolution, resolution).T
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -4,7 +4,6 @@ import GPy
 import numpy as np
 import matplotlib as mpl
 import time
 import Image
 try:
    import visual
    visual_available = True
@ -324,6 +323,7 @@ class image_show(matplotlib_show):
        else:
            self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
        if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized)
            from PIL import Image
            self.vals = Image.fromarray(self.vals.astype('uint8'))
            self.vals.putpalette(self.palette) # palette is a list, must be loaded before calling this function
--- a/GPy/testing/index_operations_tests.py
+++ b/GPy/testing/index_operations_tests.py
@ -0,0 +1,60 @@
 '''
 Created on 12 Feb 2014
@author: maxz
 '''
 import unittest
 import numpy as np
 from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
    ParameterIndexOperationsView
 one, two, three = 'one', 'two', 'three'
 class Test(unittest.TestCase):
    def setUp(self):
        self.param_index = ParameterIndexOperations()
        self.param_index.add(one, [3])
        self.param_index.add(two, [0,5])
        self.param_index.add(three, [2,4,7])
    def test_remove(self):
        self.param_index.remove(three, np.r_[3:10])
        self.assertListEqual(self.param_index[three].tolist(), [2])
        self.param_index.remove(one, [1])
        self.assertListEqual(self.param_index[one].tolist(), [3])        
    def test_index_view(self):
        #=======================================================================
        #          0    1    2    3    4    5    6    7    8    9
        #                        one
        #         two                      two
        #                   three     three          three
        # view:             [0    1    2    3    4    5    ]
        #=======================================================================
        view = ParameterIndexOperationsView(self.param_index, 2, 6)
        self.assertSetEqual(set(view.properties()), set([one, two, three]))
        for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
            self.assertEqual(v, p)
        self.assertSetEqual(set(view[two]), set([3]))
        self.assertSetEqual(set(self.param_index[two]), set([0, 5]))
        view.add(two, np.array([0]))
        self.assertSetEqual(set(view[two]), set([0,3]))
        self.assertSetEqual(set(self.param_index[two]), set([0, 2, 5]))
        view.clear()
        for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
            self.assertEqual(v, p)
            self.assertEqual(v, [])
        param_index = ParameterIndexOperations()
        param_index.add(one, [3])
        param_index.add(two, [0,5])
        param_index.add(three, [2,4,7])
        view2 = ParameterIndexOperationsView(param_index, 2, 6)
        view.update(view2)
        for [i,v],[i2,v2] in zip(sorted(param_index.items()), sorted(self.param_index.items())):
            self.assertEqual(i, i2)
            self.assertTrue(np.all(v == v2))
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_index_view']
    unittest.main()
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -1,10 +1,10 @@
 import numpy as np
 import unittest
 import GPy
-from GPy.models import GradientChecker
+from ..models import GradientChecker
 import functools
 import inspect
-from GPy.likelihoods import link_functions
+from ..likelihoods import link_functions
 from ..core.parameterization import Param
 from functools import partial
 #np.random.seed(300)
@ -516,9 +516,8 @@ class TestNoiseModels(object):
        Y = Y/Y.max()
        white_var = 1e-6
        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
-        laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
        m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
        m.ensure_default_constraints()
        m['white'].constrain_fixed(white_var)
        #Set constraints
@ -541,6 +540,10 @@ class TestNoiseModels(object):
            #import ipdb; ipdb.set_trace()
            #NOTE this test appears to be stochastic for some likelihoods (student t?)
            # appears to all be working in test mode right now...
        if not m.checkgrad():
            import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
        assert m.checkgrad(step=step)
    ###########
@ -555,7 +558,6 @@ class TestNoiseModels(object):
        kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
        ep_inf = GPy.inference.latent_function_inference.EP()
        m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
        m.ensure_default_constraints()
        m['white'].constrain_fixed(white_var)
        for param_num in range(len(param_names)):
@ -644,13 +646,11 @@ class LaplaceTests(unittest.TestCase):
        m1['variance'] = initial_var_guess
        m1['variance'].constrain_bounded(1e-4, 10)
        m1['rbf'].constrain_bounded(1e-4, 10)
        m1.ensure_default_constraints()
        m1.randomize()
        gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
-        laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
+        laplace_inf = GPy.inference.latent_function_inference.Laplace()
        m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
        m2.ensure_default_constraints()
        m2['white'].constrain_fixed(1e-6)
        m2['rbf'].constrain_bounded(1e-4, 10)
        m2['variance'].constrain_bounded(1e-4, 10)
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -0,0 +1,93 @@
 '''
 Created on Feb 13, 2014
@author: maxzwiessele
 '''
 import unittest
 import GPy
 import numpy as np
 class Test(unittest.TestCase):
    def setUp(self):
        self.rbf = GPy.kern.rbf(1)
        self.white = GPy.kern.white(1)
        from GPy.core.parameterization import Param
        from GPy.core.parameterization.transformations import Logistic
        self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
        self.test1 = GPy.core.Parameterized("test model")
        self.test1.add_parameter(self.white)
        self.test1.add_parameter(self.rbf, 0)
        self.test1.add_parameter(self.param)
    def test_add_parameter(self):
        self.assertEquals(self.rbf._parent_index_, 0)
        self.assertEquals(self.white._parent_index_, 1)
        pass
    def test_fixes(self):
        self.white.fix(warning=False)
        self.test1.remove_parameter(self.test1.param)
        self.assertTrue(self.test1._has_fixes())
        from GPy.core.parameterization.transformations import FIXED, UNFIXED
        self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
        self.test1.add_parameter(self.white, 0)
        self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
    def test_remove_parameter(self):
        from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__
        self.white.fix()
        self.test1.remove_parameter(self.white)
        self.assertIs(self.test1._fixes_,None)
        self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
        self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
        self.assertEquals(self.white.white.constraints._offset, 0)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)        
        self.test1.add_parameter(self.white, 0)
        self.assertIs(self.test1.constraints, self.white.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)        
        self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
        self.assertIs(self.white._fixes_,None)
        self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
        self.test1.remove_parameter(self.white)
        self.assertIs(self.test1._fixes_,None)
        self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
        self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)        
    def test_add_parameter_already_in_hirarchy(self):
        self.test1.add_parameter(self.white._parameters_[0])
    def test_default_constraints(self):
        self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
        self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
        self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
        from GPy.core.parameterization.transformations import Logexp
        kern = self.rbf+self.white
        self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
    def test_constraints(self):
        self.rbf.constrain(GPy.transformations.Square(), False)
        self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2))
        self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2])
        self.test1.remove_parameter(self.rbf)
        self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
    def test_constraints_views(self):
        self.assertEqual(self.white.constraints._offset, 2)
        self.assertEqual(self.rbf.constraints._offset, 0)
        self.assertEqual(self.param.constraints._offset, 3)
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_add_parameter']
    unittest.main()
--- a/GPy/util/init.py
+++ b/GPy/util/init.py
@ -10,6 +10,7 @@ import datasets
 import mocap
 import decorators
 import classification
 import subarray_and_sorting
 import caching
 try:
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -29,7 +29,7 @@ class Cacher(object):
            return self.cached_outputs[-1]
    def on_cache_changed(self, X):
-        print id(X)
+        #print id(X)
        i = self.cached_inputs.index(X)
        self.inputs_changed[i] = True
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -17,7 +17,8 @@ import os
 from config import *
 if np.all(np.float64((scipy.__version__).split('.')[:2]) >= np.array([0, 12])):
-    import scipy.linalg.lapack as lapack
+    #import scipy.linalg.lapack.clapack as lapack
    from scipy.linalg import lapack
 else:
    from scipy.linalg.lapack import flapack as lapack
@ -41,42 +42,103 @@ else:
        _blas_available = False
        warnings.warn("warning: caught this exception:" + str(e))
-def dtrtri(L, lower=0):
+def force_F_ordered_symmetric(A):
    """
-    Wrapper for lapack dtrtrs function
+    return a F ordered version of A, assuming A is symmetric
    """
    if A.flags['F_CONTIGUOUS']:
        return A
    if A.flags['C_CONTIGUOUS']:
        return A.T
    else:
        return np.asfortranarray(A)
 def force_F_ordered(A):
    """
    return a F ordered version of A, assuming A is triangular
    """
    if A.flags['F_CONTIGUOUS']:
        return A
    print "why are your arrays not F order?"
    return np.asfortranarray(A)
 def jitchol(A, maxtries=5):
    A = force_F_ordered_symmetric(A)
    L, info = lapack.dpotrf(A, lower=1)
    if info == 0:
        return L
    else:
        if maxtries==0:
            raise linalg.LinAlgError, "not positive definite, even with jitter."
        diagA = np.diag(A)
        if np.any(diagA <= 0.):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
 #def jitchol(A, maxtries=5):
 #    A = np.ascontiguousarray(A)
 #    L, info = lapack.dpotrf(A, lower=1)
 #    if info == 0:
 #        return L
 #    else:
 #        diagA = np.diag(A)
 #        if np.any(diagA <= 0.):
 #            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
 #        jitter = diagA.mean() * 1e-6
 #        while maxtries > 0 and np.isfinite(jitter):
 #            print 'Warning: adding jitter of {:.10e}'.format(jitter)
 #            try:
 #                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
 #            except:
 #                jitter *= 10
 #            finally:
 #                maxtries -= 1
 #        raise linalg.LinAlgError, "not positive definite, even with jitter."
 #
 def dtrtri(L, lower=1):
    """
    Wrapper for lapack dtrtri function
    Inverse of L
    :param L: Triangular Matrix L
    :param lower: is matrix lower (true) or upper (false)
    :returns: Li, info
    """
    L = force_F_ordered(L)
    return lapack.dtrtri(L, lower=lower)
-def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
+def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
    """
    Wrapper for lapack dtrtrs function
-    :param A: Matrix A
+    :param A: Matrix A(triangular)
    :param B: Matrix B
    :param lower: is matrix lower (true) or upper (false)
    :returns:
    """
    A = force_F_ordered(A)
    #Note: B does not seem to need to be F ordered!
    return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
-def dpotrs(A, B, lower=0):
+def dpotrs(A, B, lower=1):
    """
    Wrapper for lapack dpotrs function
    :param A: Matrix A
    :param B: Matrix B
    :param lower: is matrix lower (true) or upper (false)
    :returns:
    """
    A = force_F_ordered(A)
    return lapack.dpotrs(A, B, lower=lower)
-def dpotri(A, lower=0):
+def dpotri(A, lower=1):
    """
    Wrapper for lapack dpotri function
@ -85,7 +147,12 @@ def dpotri(A, lower=0):
    :returns: A inverse
    """
-    return lapack.dpotri(A, lower=lower)
+    assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
    A = force_F_ordered(A)
    R, info = lapack.dpotri(A, lower=0)
    symmetrify(R)
    return R, info
 def pddet(A):
    """
@ -133,60 +200,8 @@ def _mdot_r(a, b):
            b = b[0]
    return np.dot(a, b)
 def jitchol(A, maxtries=5):
    A = np.asfortranarray(A)
    L, info = lapack.dpotrf(A, lower=1)
    if info == 0:
        return L
    else:
        diagA = np.diag(A)
        if np.any(diagA <= 0.):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        while maxtries > 0 and np.isfinite(jitter):
            print 'Warning: adding jitter of {:.10e}'.format(jitter)
            try:
                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
            except:
                jitter *= 10
            finally:
                maxtries -= 1
        raise linalg.LinAlgError, "not positive definite, even with jitter."
 def jitchol_old(A, maxtries=5):
    """
    :param A: An almost pd square matrix
    :rval L: the Cholesky decomposition of A
    .. note:
      Adds jitter to K, to enforce positive-definiteness
      if stuff breaks, please check:
      np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
    """
    try:
        return linalg.cholesky(A, lower=True)
    except linalg.LinAlgError:
        diagA = np.diag(A)
        if np.any(diagA < 0.):
            raise linalg.LinAlgError, "not pd: negative diagonal elements"
        jitter = diagA.mean() * 1e-6
        for i in range(1, maxtries + 1):
            print '\rWarning: adding jitter of {:.10e}                        '.format(jitter),
            try:
                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
            except:
                jitter *= 10
        raise linalg.LinAlgError, "not positive definite, even with jitter."
 def pdinv(A, *args):
    """
    :param A: A DxD pd numpy array
    :rval Ai: the inverse of A
@ -201,7 +216,7 @@ def pdinv(A, *args):
    """
    L = jitchol(A, *args)
    logdet = 2.*np.sum(np.log(np.diag(L)))
-    Li = chol_inv(L)
+    Li = dtrtri(L)
    Ai, _ = lapack.dpotri(L)
    # Ai = np.tril(Ai) + np.tril(Ai,-1).T
    symmetrify(Ai)
@ -209,7 +224,7 @@ def pdinv(A, *args):
    return Ai, L, Li, logdet
-def chol_inv(L):
+def dtrtri(L):
    """
    Inverts a Cholesky lower triangular matrix
@ -218,7 +233,8 @@ def chol_inv(L):
    """
-    return lapack.dtrtri(L, lower=True)[0]
+    L = force_F_ordered(L)
    return lapack.dtrtri(L, lower=1)[0]
 def multiple_pdinv(A):
@ -234,7 +250,7 @@ def multiple_pdinv(A):
    N = A.shape[-1]
    chols = [jitchol(A[:, :, i]) for i in range(N)]
    halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
-    invs = [lapack.dpotri(L[0], True)[0] for L in chols]
+    invs = [dpotri(L[0], True)[0] for L in chols]
    invs = [np.triu(I) + np.triu(I, 1).T for I in invs]
    return np.dstack(invs), np.array(halflogdets)
@ -425,7 +441,8 @@ def tdot_blas(mat, out=None):
    symmetrify(out, upper=True)
-    return out
+
    return np.ascontiguousarray(out)
 def tdot(*args, **kwargs):
    if _blas_available:
@ -557,24 +574,24 @@ def cholupdate(L, x):
 def backsub_both_sides(L, X, transpose='left'):
    """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
    if transpose == 'left':
-        tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
+        tmp, _ = dtrtrs(L, X, lower=1, trans=1)
-        return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
+        return dtrtrs(L, tmp.T, lower=1, trans=1)[0].T
    else:
-        tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
+        tmp, _ = dtrtrs(L, X, lower=1, trans=0)
-        return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T
+        return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
 def PCA(Y, input_dim):
    """
-Principal component analysis: maximum likelihood solution by SVD
+    Principal component analysis: maximum likelihood solution by SVD
-:param Y: NxD np.array of data
+    :param Y: NxD np.array of data
-:param input_dim: int, dimension of projection
+    :param input_dim: int, dimension of projection
-:rval X: - Nxinput_dim np.array of dimensionality reduced data
+    :rval X: - Nxinput_dim np.array of dimensionality reduced data
-:rval W: - input_dimxD mapping from X to Y
+    :rval W: - input_dimxD mapping from X to Y
-"""
+    """
    if not np.allclose(Y.mean(axis=0), 0.0):
        print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"