Merging changed files.

2026-05-15 06:52:39 +02:00 · 2013-09-21 12:15:58 +01:00 · 2013-09-21 12:15:58 +01:00 · 94ddfa7973
commit 94ddfa7973
parent d9f895603b 7af2d62ee6
45 changed files with 1176 additions and 478 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -176,7 +176,7 @@ class GP(GPBase):

        .. Note:: For multiple output models only
        """
-        assert hasattr(self,'multioutput')
+        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
        index = np.ones_like(Xnew)*output
        Xnew = np.hstack((Xnew,index))

@ -204,8 +204,7 @@ class GP(GPBase):

        .. Note:: For multiple output models only
        """
-        assert hasattr(self,'multioutput')
-
+        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
        # creates an index column and appends it to _Xnew
        index = np.ones_like(_Xnew)*output
        _Xnew = np.hstack((_Xnew,index))
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -59,28 +59,28 @@ class GPBase(Model):

    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
        """
-            Plot the GP's view of the world, where the data is normalized and the
-              - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
-              - In two dimsensions, a contour-plot shows the mean predicted function
-              - Not implemented in higher dimensions
+        Plot the GP's view of the world, where the data is normalized and the
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - Not implemented in higher dimensions

-            :param samples: the number of a posteriori samples to plot
-            :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-            :param which_data: which if the training data to plot (default all)
-            :type which_data: 'all' or a slice object to slice self.X, self.Y
-            :param which_parts: which of the kernel functions to plot (additively)
-            :type which_parts: 'all', or list of bools
-            :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-            :type resolution: int
-            :param full_cov:
-            :type full_cov: bool
-                    :param fignum: figure to plot on.
-            :type fignum: figure number
-            :param ax: axes to plot on.
-            :type ax: axes handle
+        :param samples: the number of a posteriori samples to plot
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param which_parts: which of the kernel functions to plot (additively)
+        :type which_parts: 'all', or list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param full_cov:
+        :type full_cov: bool
+                :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle

-            :param output: which output to plot (for multiple output models only)
-            :type output: integer (first output is 0)
+        :param output: which output to plot (for multiple output models only)
+        :type output: integer (first output is 0)
        """
        if which_data == 'all':
            which_data = slice(None)
@ -89,69 +89,81 @@ class GPBase(Model):
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)

-        if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
-            Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-            if samples == 0:
-                m, v = self._raw_predict(Xnew, which_parts=which_parts)
-                gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+        if not hasattr(self,'multioutput'):
+
+            if self.X.shape[1] == 1:
+                Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+                if samples == 0:
+                    m, v = self._raw_predict(Xnew, which_parts=which_parts)
+                    gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+                    ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
+                else:
+                    m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
+                    v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+                    Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+                    gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
+                    for i in range(samples):
+                        ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
                ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
+                ax.set_xlim(xmin, xmax)
+                ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
+                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+                ax.set_ylim(ymin, ymax)
+
+                if hasattr(self,'Z'):
+                    Zu = self.Z * self._Xscale + self._Xoffset
+                    ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+
+            elif self.X.shape[1] == 2:
+                resolution = resolution or 50
+                Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
+                m, v = self._raw_predict(Xnew, which_parts=which_parts)
+                m = m.reshape(resolution, resolution).T
+                ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
+                ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
+                ax.set_xlim(xmin[0], xmax[0])
+                ax.set_ylim(xmin[1], xmax[1])
+
            else:
-                m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
-                Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
-                gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
-                for i in range(samples):
-                    ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
-            ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
-            ax.set_xlim(xmin, xmax)
-            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
-            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-            ax.set_ylim(ymin, ymax)
-
-        elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
-            resolution = resolution or 50
-            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
-            m, v = self._raw_predict(Xnew, which_parts=which_parts)
-            m = m.reshape(resolution, resolution).T
-            ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
-            ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
-            ax.set_xlim(xmin[0], xmax[0])
-            ax.set_ylim(xmin[1], xmax[1])
-
-
-        elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
-            output -= 1
-            assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
-            Xu = self.X[self.X[:,-1]==output ,0:1]
-            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
-
-            if samples == 0:
-                m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
-                gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
-                ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
-            else:
-                m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
-                Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
-                gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
-                for i in range(samples):
-                    ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
-            ax.set_xlim(xmin, xmax)
-            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
-            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-            ax.set_ylim(ymin, ymax)
-
-            if hasattr(self,'Z'):
-                Zu = self.Z[self.Z[:,-1]==output,:]
-                Zu = self.Z * self._Xscale + self._Xoffset
-                Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
-                ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
-
-        elif self.X.shape[1] == 3 and hasattr(self,'multioutput'):
-            raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
-            output -= 1
-            assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
-
+                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
        else:
-            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+            assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
+
+            if self.X.shape[1] == 2:
+                assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
+                Xu = self.X[self.X[:,-1]==output ,0:1]
+                Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
+
+                if samples == 0:
+                    m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
+                    gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+                    ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
+                else:
+                    m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
+                    v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+                    Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+                    gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
+                    for i in range(samples):
+                        ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+                ax.set_xlim(xmin, xmax)
+                ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
+                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+                ax.set_ylim(ymin, ymax)
+
+            elif self.X.shape[1] == 3:
+                raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
+                assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
+
+            else:
+                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+        if hasattr(self,'Z'):
+            Zu = self.Z[self.Z[:,-1]==output,:]
+            Zu = self.Z * self._Xscale + self._Xoffset
+            Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
+            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+

    def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
        """
@ -203,7 +215,7 @@ class GPBase(Model):
            if plotdims == 1:
                resolution = resolution or 200

-                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+                Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now

                fixed_dims = np.array([i for i,v in fixed_inputs])
                freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -31,8 +31,8 @@ class Model(Parameterized):
    def getstate(self):
        """
        Get the current state of the class.
-        
        Inherited from Parameterized, so add those parameters to the state
+
        :return: list of states from the model.

        """
@ -46,7 +46,7 @@ class Model(Parameterized):
        call Parameterized with the rest of the state

        :param state: the state of the model.
-        :type state: list as returned from getstate.        
+        :type state: list as returned from getstate.
        """
        self.preferred_optimizer = state.pop()
        self.sampling_runs = state.pop()
@ -397,17 +397,20 @@ class Model(Parameterized):
            return np.nan
        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld

-    def __str__(self, names=None):
-        if names is None:
-            names = self._get_print_names()
-        s = Parameterized.__str__(self, names=names).split('\n')
+    def __str__(self):
+        s = Parameterized.__str__(self).split('\n')
+        #def __str__(self, names=None):
+        #    if names is None:
+        #        names = self._get_print_names()
+        #s = Parameterized.__str__(self, names=names).split('\n')
        # add priors to the string
        if self.priors is not None:
            strs = [str(p) if p is not None else '' for p in self.priors]
        else:
-            strs = [''] * len(self._get_param_names())
-        name_indices = self.grep_param_names("|".join(names))
-        strs = np.array(strs)[name_indices]
+            strs = [''] * len(self._get_params())
+       #         strs = [''] * len(self._get_param_names())
+       #     name_indices = self.grep_param_names("|".join(names))
+       #     strs = np.array(strs)[name_indices]
        width = np.array(max([len(p) for p in strs] + [5])) + 4

        log_like = self.log_likelihood()
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@ -27,9 +27,9 @@ class Parameterized(object):

    def _get_param_names(self):
        raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
-    def _get_print_names(self):
-        """ Override for which names to print out, when using print m """
-        return self._get_param_names()
+    #def _get_print_names(self):
+    #    """ Override for which names to print out, when using print m """
+    #    return self._get_param_names()

    def pickle(self, filename, protocol=None):
        if protocol is None:
@ -63,10 +63,10 @@ class Parameterized(object):
        """
        Get the current state of the class,
        here just all the indices, rest can get recomputed
-        
        For inheriting from Parameterized:
-        Allways append the state of the inherited object 
-        and call down to the inherited object in setstate!! 
+
+        Allways append the state of the inherited object
+        and call down to the inherited object in setstate!!
        """
        return [self.tied_indices,
                self.fixed_indices,
@ -336,26 +336,30 @@ class Parameterized(object):
        n = [nn for i, nn in enumerate(n) if not i in remove]
        return n

-    @property
-    def all(self):
-        return self.__str__(self._get_param_names())
+    #@property
+    #def all(self):
+    #    return self.__str__(self._get_param_names())


-    def __str__(self, names=None, nw=30):
+    #def __str__(self, names=None, nw=30):
+    def __str__(self, nw=30):
        """
        Return a string describing the parameter names and their ties and constraints
        """
-        if names is None:
-            names = self._get_print_names()
-        name_indices = self.grep_param_names("|".join(names))
+        names = self._get_param_names()
+        #if names is None:
+        #    names = self._get_print_names()
+        #name_indices = self.grep_param_names("|".join(names))
        N = len(names)

        if not N:
            return "This object has no free parameters."
        header = ['Name', 'Value', 'Constraints', 'Ties']
-        values = self._get_params()[name_indices] # map(str,self._get_params())
+        values = self._get_params() # map(str,self._get_params())
+        #values = self._get_params()[name_indices] # map(str,self._get_params())
        # sort out the constraints
-        constraints = [''] * len(self._get_param_names())
+        constraints = [''] * len(names)
+        #constraints = [''] * len(self._get_param_names())
        for i, t in zip(self.constrained_indices, self.constraints):
            for ii in i:
                constraints[ii] = t.__str__()
@ -368,7 +372,10 @@ class Parameterized(object):
            for j in tie:
                ties[j] = '(' + str(i) + ')'

-        values = ['%.4f' % float(v) for v in values]
+        if values.size == 1:
+            values = ['%.4f' %float(values)]
+        else:
+            values = ['%.4f' % float(v) for v in values]
        max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
        max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
        max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
@ -383,3 +390,77 @@ class Parameterized(object):


        return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
+
+    def grep_model(self,regexp):
+        regexp_indices = self.grep_param_names(regexp)
+        all_names = self._get_param_names()
+
+        names = [all_names[pj] for pj in regexp_indices]
+        N = len(names)
+
+        if not N:
+            return "Match not found."
+
+        header = ['Name', 'Value', 'Constraints', 'Ties']
+        all_values = self._get_params()
+        values = np.array([all_values[pj] for pj in regexp_indices])
+        constraints = [''] * len(names)
+
+        _constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices)
+        _constraints = [self.constraints[pj] for pj in aux]
+
+        for i, t in zip(_constrained_indices, _constraints):
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                constraints[iii] = t.__str__()
+
+        _fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices)
+        for i in _fixed_indices:
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                constraints[ii] = 'Fixed'
+
+        _tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices)
+        ties = [''] * len(names)
+        for i,ti in zip(_tied_indices,aux):
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                ties[iii] = '(' + str(ti) + ')'
+
+        if values.size == 1:
+            values = ['%.4f' %float(values)]
+        else:
+            values = ['%.4f' % float(v) for v in values]
+
+        max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
+        max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
+        max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
+        max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
+        cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
+
+        header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
+        header_string = map(lambda x: '|'.join(x), [header_string])
+        separator = '-' * len(header_string[0])
+        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
+
+        print header_string[0]
+        print separator
+        for string in param_string:
+            print string
+
+    def _pick_elements(self,regexp_ind,array_list):
+        """Removes from array_list the elements different from regexp_ind"""
+        new_array_list = [] #New list with elements matching regexp_ind
+        array_indices = [] #Indices that matches the arrays in new_array_list and array_list
+
+        array_index = 0
+        for array in array_list:
+            _new = []
+            for ai in array:
+                if ai in regexp_ind:
+                    _new.append(ai)
+            if len(_new):
+                new_array_list.append(np.array(_new))
+                array_indices.append(array_index)
+            array_index += 1
+        return new_array_list, array_indices
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -165,13 +165,17 @@ class SparseGP(GPBase):
                raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"

            else:
+
+                LBi = chol_inv(self.LB)
                Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
                _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
-                _Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0)
+

                self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
                self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
-                self.partial_for_likelihood += 0.5*np.sum(_Bi_Lmi_psi1*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 #NOTE this term has numerical issues
+
+                self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
+
                self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
                self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2

@ -208,8 +212,8 @@ class SparseGP(GPBase):
        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
            + self.kern._get_param_names_transformed() + self.likelihood._get_param_names()

-    def _get_print_names(self):
-        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
+    #def _get_print_names(self):
+    #    return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()

    def update_likelihood_approximation(self):
        """
@ -254,7 +258,7 @@ class SparseGP(GPBase):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
-        dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z) 
+        dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
        if self.has_uncertain_inputs:
            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
@ -288,7 +292,7 @@ class SparseGP(GPBase):
                Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
                var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
        else:
-            # assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented"
+            # assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
            Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
            mu = np.dot(Kx, self.Cpsi1V)
            if full_cov:
@ -344,38 +348,45 @@ class SparseGP(GPBase):
            which_data = slice(None)

        GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
-        if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
-            if self.has_uncertain_inputs:
-                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
-                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
-                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
-                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
-            Zu = self.Z * self._Xscale + self._Xoffset
-            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)

-        elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
-            Zu = self.Z * self._Xscale + self._Xoffset
-            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
+        if not hasattr(self,'multioutput'):

-        elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
-            Xu = self.X[self.X[:,-1]==output,:]
-            if self.has_uncertain_inputs:
-                Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
+            if self.X.shape[1] == 1:
+                if self.has_uncertain_inputs:
+                    Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+                    ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                                xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+                Zu = self.Z * self._Xscale + self._Xoffset
+                ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)

-                Xu = self.X[self.X[:,-1]==output ,0:1] #??
-
-                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
-                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
-                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
-
-            Zu = self.Z[self.Z[:,-1]==output,:]
-            Zu = self.Z * self._Xscale + self._Xoffset
-            Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
-            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
-            #ax.set_ylim(ax.get_ylim()[0],)
+            elif self.X.shape[1] == 2:
+                Zu = self.Z * self._Xscale + self._Xoffset
+                ax.plot(Zu[:, 0], Zu[:, 1], 'wo')

        else:
-            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+            pass
+            """
+            if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
+                Xu = self.X[self.X[:,-1]==output,:]
+                if self.has_uncertain_inputs:
+                    Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
+
+                    Xu = self.X[self.X[:,-1]==output ,0:1] #??
+
+                    ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                                xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+
+                Zu = self.Z[self.Z[:,-1]==output,:]
+                Zu = self.Z * self._Xscale + self._Xoffset
+                Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
+                ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+                #ax.set_ylim(ax.get_ylim()[0],)
+
+            else:
+                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+            """

    def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
        """
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -166,3 +166,35 @@ def FITC_crescent_data(num_inducing=10, seed=default_seed):
    print(m)
    m.plot()
    return m
+
+
+def toy_heaviside(seed=default_seed):
+    """
+    Simple 1D classification example using a heavy side gp transformation
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
+    Y = data['Y'][:, 0:1]
+    Y[Y.flatten() == -1] = 0
+
+    # Model definition
+    noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
+    likelihood = GPy.likelihoods.EP(Y,noise_model)
+    m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
+
+    # Optimize
+    m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize()
+    #m.pseudo_EM()
+
+    # Plot
+    fig, axes = pb.subplots(2,1)
+    m.plot_f(ax=axes[0])
+    m.plot(ax=axes[1])
+    print(m)
+
+    return m
+
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -9,9 +9,9 @@ import pylab as pb
 import numpy as np
 import GPy

-def coregionalisation_toy2(max_iters=100):
+def coregionalization_toy2(max_iters=100):
    """
-    A simple demonstration of coregionalisation on two sinusoidal functions.
+    A simple demonstration of coregionalization on two sinusoidal functions.
    """
    X1 = np.random.rand(50, 1) * 8
    X2 = np.random.rand(30, 1) * 5
@ -40,9 +40,9 @@ def coregionalisation_toy2(max_iters=100):
    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
    return m

-def coregionalisation_toy(max_iters=100):
+def coregionalization_toy(max_iters=100):
    """
-    A simple demonstration of coregionalisation on two sinusoidal functions.
+    A simple demonstration of coregionalization on two sinusoidal functions.
    """
    X1 = np.random.rand(50, 1) * 8
    X2 = np.random.rand(30, 1) * 5
@ -63,9 +63,9 @@ def coregionalisation_toy(max_iters=100):
    axes[1].set_title('Output 1')
    return m

-def coregionalisation_sparse(max_iters=100):
+def coregionalization_sparse(max_iters=100):
    """
-    A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
+    A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
    """
    X1 = np.random.rand(500, 1) * 8
    X2 = np.random.rand(300, 1) * 5
@ -75,41 +75,18 @@ def coregionalisation_sparse(max_iters=100):
    Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
    Y = np.vstack((Y1, Y2))

-    num_inducing = 40
-    Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
-    Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
-
    k1 = GPy.kern.rbf(1)

    m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20)
-    #k2 = GPy.kern.coregionalize(2, 2)
-    #k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
-    #m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
-    m.constrain_fixed('.*rbf_var', 1.)
-
-    #m.constrain_fixed('iip')
-    #m.constrain_bounded('noise_variance', 1e-3, 1e-1)
-#     m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
-    m.optimize(max_iters=max_iters)
+    m.constrain_fixed('.*rbf_var',1.)
+    m.optimize(messages=1)
+    #m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')

    fig, axes = pb.subplots(2,1)
    m.plot(output=0,ax=axes[0])
    m.plot(output=1,ax=axes[1])
    axes[0].set_title('Output 0')
    axes[1].set_title('Output 1')
-    # plotting:
-    #pb.figure()
-    #Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
-    #Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
-    #mean, var, low, up = m.predict(Xtest1)
-    #GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
-    #mean, var, low, up = m.predict(Xtest2)
-    #GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
-    #pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
-    #pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
-    #y = pb.ylim()[0]
-    #pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
-    #pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
    return m

 def epomeo_gpx(max_iters=100):
@ -136,7 +113,7 @@ def epomeo_gpx(max_iters=100):

    k1 = GPy.kern.rbf(1)
    k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
-    k = k1**k2 
+    k = k1**k2

    m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
    m.constrain_fixed('.*rbf_var', 1.)
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -373,7 +373,7 @@ def symmetric(k):
    k_.parts = [symmetric.Symmetric(p) for p in k.parts]
    return k_

-def coregionalize(output_dim,W_columns=1, W=None, kappa=None):
+def coregionalize(output_dim,rank=1, W=None, kappa=None):
    """
    Coregionlization matrix B, of the form:
    .. math::
@ -387,16 +387,16 @@ def coregionalize(output_dim,W_columns=1, W=None, kappa=None):

    :param output_dim: the number of outputs to corregionalize
    :type output_dim: int
-    :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
-    :type W_colunns: int
-    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
-    :type W: numpy array of dimensionality (num_outpus, W_columns)
+    :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
+    :type rank: int
+    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
+    :type W: numpy array of dimensionality (num_outpus, rank)
    :param kappa: a vector which allows the outputs to behave independently
    :type kappa: numpy array of dimensionality  (output_dim,)
    :rtype: kernel object

    """
-    p = parts.coregionalize.Coregionalize(output_dim,W_columns,W,kappa)
+    p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
    return kern(1,[p])


@ -456,7 +456,7 @@ def hierarchical(k):
    _parts = [parts.hierarchical.Hierarchical(k.parts)]
    return kern(k.input_dim+len(k.parts),_parts)

-def build_lcm(input_dim, output_dim, kernel_list = [], W_columns=1,W=None,kappa=None):
+def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
    """
    Builds a kernel of a linear coregionalization model

@ -464,8 +464,8 @@ def build_lcm(input_dim, output_dim, kernel_list = [], W_columns=1,W=None,kappa=
    :output_dim: Number of outputs
    :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
    :type kernel_list: list of GPy kernels
-    :param W_columns: number tuples of the corregionalization parameters 'coregion_W'
-    :type W_columns: integer
+    :param rank: number tuples of the corregionalization parameters 'coregion_W'
+    :type rank: integer

    ..Note the kernels dimensionality is overwritten to fit input_dim
    """
@ -475,11 +475,11 @@ def build_lcm(input_dim, output_dim, kernel_list = [], W_columns=1,W=None,kappa=
            k.input_dim = input_dim
            warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")

-    k_coreg = coregionalize(output_dim,W_columns,W,kappa)
+    k_coreg = coregionalize(output_dim,rank,W,kappa)
    kernel = kernel_list[0]**k_coreg.copy()

    for k in kernel_list[1:]:
-        k_coreg = coregionalize(output_dim,W_columns,W,kappa)
+        k_coreg = coregionalize(output_dim,rank,W,kappa)
        kernel += k**k_coreg.copy()

    return kernel
--- a/GPy/kern/parts/init.py
+++ b/GPy/kern/parts/init.py
@ -6,7 +6,7 @@ import eq_ode1
 import finite_dimensional
 import fixed
 import gibbs
-import hetero #hetero.py is not commited: omitting for now. JH. 
+import hetero
 import hierarchical
 import independent_outputs
 import linear
--- a/GPy/kern/parts/coregionalize.py
+++ b/GPy/kern/parts/coregionalize.py
@ -24,8 +24,8 @@ class Coregionalize(Kernpart):

    :param output_dim: number of outputs to coregionalize
    :type output_dim: int
-    :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
-    :type W_colunns: int
+    :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
+    :type rank: int
    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
    :type W: numpy array of dimensionality (num_outpus, W_columns)
    :param kappa: a vector which allows the outputs to behave independently
@ -38,6 +38,8 @@ class Coregionalize(Kernpart):
        self.name = 'coregion'
        self.output_dim = output_dim
        self.rank = rank
+        if self.rank>output_dim-1:
+            print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
        if W is None:
            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
        else:
@ -158,4 +160,5 @@ class Coregionalize(Kernpart):
        target += np.hstack([dW.flatten(),dkappa])

    def dK_dX(self,dL_dK,X,X2,target):
+        #NOTE In this case, pass is equivalent to returning zero.
        pass
--- a/GPy/kern/parts/poly.py
+++ b/GPy/kern/parts/poly.py
@ -20,8 +20,8 @@ class POLY(Kernpart):

    The kernel is not recommended as it is badly behaved when the
    \sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
-    there is an automatic relevance determination version of this
-    kernel provided.
+    there will be an automatic relevance determination version of this
+    kernel provided (NOT YET IMPLEMENTED!).

    :param input_dim: the number of input dimensions
    :type input_dim: int 
--- a/GPy/kern/parts/prod.py
+++ b/GPy/kern/parts/prod.py
@ -2,6 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

 from kernpart import Kernpart
+from coregionalize import Coregionalize
 import numpy as np
 import hashlib

@ -18,7 +19,7 @@ class Prod(Kernpart):
    """
    def __init__(self,k1,k2,tensor=False):
        self.num_params = k1.num_params + k2.num_params
-        self.name = '['+k1.name + '(x)' + k2.name +']'
+        self.name = '['+k1.name + '**' + k2.name +']'
        self.k1 = k1
        self.k2 = k2
        if tensor:
@ -60,7 +61,7 @@ class Prod(Kernpart):
        """Compute the part of the kernel associated with k2."""
        self._K_computations(X, X2)
        return self._K2
-    
+
    def dK_dtheta(self,dL_dK,X,X2,target):
        """Derivative of the covariance matrix with respect to the parameters."""
        self._K_computations(X,X2)
@ -90,8 +91,18 @@ class Prod(Kernpart):
    def dK_dX(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to X."""
        self._K_computations(X,X2)
-        self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
-        self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
+        if X2 is None:
+            if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
+                self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
+                self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
+            else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
+                #NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei]
+                X2 = X
+                self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
+                self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
+        else:
+            self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
+            self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])

    def dKdiag_dX(self, dL_dKdiag, X, target):
        K1 = np.zeros(X.shape[0])
--- a/GPy/kern/parts/rational_quadratic.py
+++ b/GPy/kern/parts/rational_quadratic.py
@ -57,7 +57,7 @@ class RationalQuadratic(Kernpart):
        dist2 = np.square((X-X2.T)/self.lengthscale)

        dvar = (1 + dist2/2.)**(-self.power)
-        dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
+        dl = self.power * self.variance * dist2 / self.lengthscale * (1 + dist2/2.)**(-self.power-1)
        dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)

        target[0] += np.sum(dvar*dL_dK)
@ -70,7 +70,7 @@ class RationalQuadratic(Kernpart):

    def dK_dX(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to X."""
-        if X2 is None: 
+        if X2 is None:
            dist2 = np.square((X-X.T)/self.lengthscale)
            dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 *  (1 + dist2/2./self.lengthscale)**(-self.power-1)
        else:
--- a/GPy/likelihoods/ep.py
+++ b/GPy/likelihoods/ep.py
@ -37,6 +37,8 @@ class EP(likelihood):
        self.VVT_factor = self.V
        self.trYYT = 0.

+        super(EP, self).__init__()
+
    def restart(self):
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -34,6 +34,8 @@ class Gaussian(likelihood):
        self._variance = np.asarray(variance) + 1.
        self._set_params(np.asarray(variance))

+        super(Gaussian, self).__init__()
+
    def set_data(self, data):
        self.data = data
        self.N, D = data.shape
--- a/GPy/likelihoods/gaussian_mixed_noise.py
+++ b/GPy/likelihoods/gaussian_mixed_noise.py
@ -45,6 +45,8 @@ class Gaussian_Mixed_Noise(likelihood):
        self.set_data(data_list)
        self._set_params(np.asarray(noise_params))

+        super(Gaussian_Mixed_Noise, self).__init__()
+
    def set_data(self, data_list):
        self.data = np.vstack(data_list)
        self.N, D = self.data.shape
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -1,7 +1,8 @@
 import numpy as np
 import copy
+from ..core.parameterized import Parameterized

-class likelihood:
+class likelihood(Parameterized):
    """
    The atom for a likelihood class

@ -16,10 +17,10 @@ class likelihood:
    self.is_heteroscedastic : enables significant computational savings in GP
    self.precision : a scalar or vector representation of the effective target precision
    self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
-    self.V : self.precision * self.Y 
+    self.V : self.precision * self.Y
    """
-    def __init__(self,data):
-        raise ValueError, "this class is not to be instantiated"
+    def __init__(self):
+        Parameterized.__init__(self)

    def _get_params(self):
        raise NotImplementedError
@ -38,7 +39,3 @@ class likelihood:

    def predictive_values(self, mu, var):
        raise NotImplementedError
-
-    def copy(self):
-        """ Returns a (deep) copy of the current likelihood """
-        return copy.deepcopy(self)
--- a/GPy/likelihoods/noise_model_constructors.py
+++ b/GPy/likelihoods/noise_model_constructors.py
@ -19,7 +19,7 @@ def binomial(gp_link=None):
        analytical_mean = True
        analytical_variance = False

-    elif isinstance(gp_link,noise_models.gp_transformations.Step):
+    elif isinstance(gp_link,noise_models.gp_transformations.Heaviside):
        analytical_mean = True
        analytical_variance = True

@ -42,7 +42,7 @@ def exponential(gp_link=None):
    analytical_variance = False
    return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)

-def gaussian(gp_link=None,variance=1.):
+def gaussian_ep(gp_link=None,variance=1.):
    """
    Construct a gaussian likelihood

--- a/GPy/likelihoods/noise_models/binomial_noise.py
+++ b/GPy/likelihoods/noise_models/binomial_noise.py
@ -49,15 +49,30 @@ class Binomial(NoiseDistribution):
            mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
            sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)

-        elif isinstance(self.gp_link,gp_transformations.Step):
-            Z_hat = None
-            mu_hat = None
-            sigma2_hat = None
+        elif isinstance(self.gp_link,gp_transformations.Heaviside):
+            a = data_i*v_i/np.sqrt(tau_i)
+            Z_hat = std_norm_cdf(a)
+            N = std_norm_pdf(a)
+            mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i)
+            sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
+            if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
+                stop

        return Z_hat, mu_hat, sigma2_hat

    def _predictive_mean_analytical(self,mu,sigma):
-        return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
+        if isinstance(self.gp_link,gp_transformations.Probit):
+            return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
+        elif isinstance(self.gp_link,gp_transformations.Heaviside):
+            return stats.norm.cdf(mu/sigma)
+        else:
+            raise NotImplementedError
+
+    def _predictive_variance_analytical(self,mu,sigma, pred_mean):
+        if isinstance(self.gp_link,gp_transformations.Heaviside):
+            return 0.
+        else:
+            raise NotImplementedError

    def _mass(self,gp,obs):
        #NOTE obs must be in {0,1}
--- a/GPy/likelihoods/noise_models/gp_transformations.py
+++ b/GPy/likelihoods/noise_models/gp_transformations.py
@ -108,7 +108,7 @@ class Reciprocal(GPTransformation):
    def d2transf_df2(self,f):
        return 2./f**3

-class Step(GPTransformation):
+class Heaviside(GPTransformation):
    """
    $$
    g(f) = I_{x \in A}
@ -116,10 +116,10 @@ class Step(GPTransformation):
    """
    def transf(self,f):
        #transformation goes here
-        return np.where(f>0, 1, -1)
+        return np.where(f>0, 1, 0)

    def dtransf_df(self,f):
-        pass
+        raise NotImplementedError, "This function is not differentiable!"

    def d2transf_df2(self,f):
-        pass
+        raise NotImplementedError, "This function is not differentiable!"
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@ -107,7 +107,7 @@ class NoiseDistribution(object):
        :param mu: cavity distribution mean
        :param sigma: cavity distribution standard deviation
        """
-        return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma))
+        return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)

    def _moments_match_analytical(self,obs,tau,v):
        """
@ -244,7 +244,7 @@ class NoiseDistribution(object):
        :param mu: cavity distribution mean
        :param sigma: cavity distribution standard deviation
        """
-        maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma))
+        maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
        mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
        """

@ -267,7 +267,7 @@ class NoiseDistribution(object):
        :param mu: cavity distribution mean
        :param sigma: cavity distribution standard deviation
        """
-        maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma))
+        maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
        mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
        return mean_squared

@ -280,7 +280,7 @@ class NoiseDistribution(object):
        :predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
        """
        # E( V(Y_star|f_star) )
-        maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma))
+        maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
        exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)

        """
@ -357,7 +357,7 @@ class NoiseDistribution(object):
        :param mu: latent variable's predictive mean
        :param sigma: latent variable's predictive standard deviation
        """
-        return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma))
+        return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)

    def predictive_values(self,mu,var):
        """
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -8,7 +8,7 @@ from .. import kern
 import itertools
 from matplotlib.colors import colorConverter
 from GPy.inference.optimization import SCG
-from GPy.util import plot_latent
+from GPy.util import plot_latent, linalg
 from GPy.models.gplvm import GPLVM
 from GPy.util.plot_latent import most_significant_input_dimensions
 from matplotlib import pyplot
@ -66,8 +66,8 @@ class BayesianGPLVM(SparseGP, GPLVM):
        S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
        return (X_names + S_names + SparseGP._get_param_names(self))

-    def _get_print_names(self):
-        return SparseGP._get_print_names(self)
+    #def _get_print_names(self):
+    #    return SparseGP._get_print_names(self)

    def _get_params(self):
        """
@ -140,12 +140,20 @@ class BayesianGPLVM(SparseGP, GPLVM):
        dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
        dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
        V = self.likelihood.precision * Y
+
+        #compute CPsi1V
+        if self.Cpsi1V is None:
+            psi1V = np.dot(self.psi1.T, self.likelihood.V)
+            tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+            tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
+            self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
+
        dpsi1 = np.dot(self.Cpsi1V, V.T)

        start = np.zeros(self.input_dim * 2)

        for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
-            args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
+            args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
            xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)

            mu, log_S = xopt.reshape(2, 1, -1)
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@ -14,7 +14,7 @@ class GPClassification(GP):
    This is a thin wrapper around the models.GP class, with a set of sensible defaults

    :param X: input observations
-    :param Y: observed values
+    :param Y: observed values, can be None if likelihood is not None
    :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
    :param kernel: a GPy kernel, defaults to rbf
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
--- a/GPy/models/gp_multioutput_regression.py
+++ b/GPy/models/gp_multioutput_regression.py
@ -6,7 +6,6 @@ import numpy as np
 from ..core import GP
 from .. import likelihoods
 from .. import kern
-#from ..util import multioutput

 class GPMultioutputRegression(GP):
    """
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -25,11 +25,11 @@ class MRD(Model):
    :param input_dim: latent dimensionality
    :type input_dim: int
    :param initx: initialisation method for the latent space :
-        
+
        * 'concat' - PCA on concatenation of all datasets
        * 'single' - Concatenation of PCA on datasets, respectively
        * 'random' - Random draw from a normal
-            
+
    :type initx: ['concat'|'single'|'random']
    :param initz: initialisation method for inducing inputs
    :type initz: 'permute'|'random'
@ -163,28 +163,31 @@ class MRD(Model):
        self._init_X(initx, self.likelihood_list)
        self._init_Z(initz, self.X)

-    def _get_latent_param_names(self):
+    #def _get_latent_param_names(self):
+    def _get_param_names(self):
        n1 = self.gref._get_param_names()
        n1var = n1[:self.NQ * 2 + self.MQ]
-        return n1var
-
-
-    def _get_kernel_names(self):
+    #    return n1var
+    #
+    #def _get_kernel_names(self):
        map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
                                         itertools.izip(ns,
                                                        itertools.repeat(name)))
-        kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names))
-        return kernel_names
+        return list(itertools.chain(n1var, *(map_names(\
+                SparseGP._get_param_names(g)[self.MQ:], n) \
+                for g, n in zip(self.bgplvms, self.names))))
+    #    kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names))
+    #    return kernel_names

-    def _get_param_names(self):
+    #def _get_param_names(self):
        # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
        # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
-        n1var = self._get_latent_param_names()
-        kernel_names = self._get_kernel_names()
-        return list(itertools.chain(n1var, *kernel_names))
+    #    n1var = self._get_latent_param_names()
+    #    kernel_names = self._get_kernel_names()
+    #    return list(itertools.chain(n1var, *kernel_names))

-    def _get_print_names(self):
-        return list(itertools.chain(*self._get_kernel_names()))
+    #def _get_print_names(self):
+    #    return list(itertools.chain(*self._get_kernel_names()))

    def _get_params(self):
        """
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -4,7 +4,7 @@
 import unittest
 import numpy as np
 import GPy
-    
+
 verbose = False

 class KernelTests(unittest.TestCase):
@ -18,11 +18,11 @@ class KernelTests(unittest.TestCase):
        self.assertTrue(m.checkgrad())

    def test_rbfkernel(self):
-        kern = GPy.kern.rbf(5)        
+        kern = GPy.kern.rbf(5)
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))

    def test_rbf_invkernel(self):
-        kern = GPy.kern.rbf_inv(5)        
+        kern = GPy.kern.rbf_inv(5)
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))

    def test_Matern32kernel(self):
@ -79,7 +79,7 @@ class KernelTests(unittest.TestCase):
        kern = GPy.kern.poly(5, degree=4)
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))

-    def test_coregionalisation(self):
+    def test_coregionalization(self):
        X1 = np.random.rand(50,1)*8
        X2 = np.random.rand(30,1)*5
        index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
--- a/GPy/util/init.py
+++ b/GPy/util/init.py
@ -14,4 +14,3 @@ import visualize
 import decorators
 import classification
 import latent_space_visualizations
-#import multioutput
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -51,7 +51,7 @@ def dpotri(A, lower=0):

    :param A: Matrix A
    :param lower: is matrix lower (true) or upper (false)
-    :returns:
+    :returns: A inverse
    """
    return lapack.dpotri(A, lower=lower)