Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-06-02 14:45:15 +02:00 · 2013-10-22 13:41:46 +01:00 · 2013-10-22 13:41:46 +01:00 · 0919507574
commit 0919507574
parent a0b58020a4 dc12fb43b7
40 changed files with 1797 additions and 658 deletions
--- a/GPy/core/fitc.py
+++ b/GPy/core/fitc.py
@ -126,7 +126,7 @@ class FITC(SparseGP):
            self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
        # the partial derivative vector for the likelihood
-        if self.likelihood.Nparams == 0:
+        if self.likelihood.num_params == 0:
            # save computation here.
            self.partial_for_likelihood = None
        elif self.likelihood.is_heteroscedastic:
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -58,7 +58,6 @@ class GP(GPBase):
    def _get_params(self):
        return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
    def _get_param_names(self):
        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
@ -129,7 +128,7 @@ class GP(GPBase):
            debug_this # @UndefinedVariable
        return mu, var
-    def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
+    def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
        """
        Predict the function(s) at the new point(s) Xnew.
@ -156,67 +155,41 @@ class GP(GPBase):
        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
        return mean, var, _025pm, _975pm
-    def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
+    def _raw_predict_single_output(self, _Xnew, output, which_parts='all', full_cov=False,stop=False):
        """
-        For a specific output, predict the function at the new point(s) Xnew.
+        For a specific output, calls _raw_predict() at the new point(s) _Xnew.
-
+        This functions calls _add_output_index(), so _Xnew should not have an index column specifying the output.
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.input_dim
        :param output: output to predict
        :type output: integer in {0,..., num_outputs-1}
        :param which_parts:  specifies which outputs kernel(s) to use in prediction
        :type which_parts: ('all', list of bools)
        :param full_cov: whether to return the full covariance matrix, or just the diagonal
        :type full_cov: bool
        :returns: posterior mean,  a Numpy array, Nnew x self.input_dim
        :returns: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
        .. Note:: For multiple output models only
        """
        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
        index = np.ones_like(Xnew)*output
        Xnew = np.hstack((Xnew,index))
        # normalize X values
        Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
        mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
        # now push through likelihood
        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
        return mean, var, _025pm, _975pm
    def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False):
        """
        Internal helper function for making predictions for a specific output,
        does not account for normalization or likelihood
        ---------
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.input_dim
        :param output: output to predict
-        :type output: integer in {0,..., num_outputs-1}
+        :type output: integer in {0,..., output_dim-1}
        :param which_parts:  specifies which outputs kernel(s) to use in prediction
        :type which_parts: ('all', list of bools)
        :param full_cov: whether to return the full covariance matrix, or just the diagonal
-        .. Note:: For multiple output models only
+        .. Note:: For multiple non-independent outputs models only.
        """
-        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
+        _Xnew = self._add_output_index(_Xnew, output)
-        # creates an index column and appends it to _Xnew
+        return self._raw_predict(_Xnew, which_parts=which_parts,full_cov=full_cov, stop=stop)
        index = np.ones_like(_Xnew)*output
        _Xnew = np.hstack((_Xnew,index))
-        Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
+    def predict_single_output(self, Xnew,output=0, which_parts='all', full_cov=False, likelihood_args=dict()):
-        KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
+        """
-        mu = np.dot(KiKx.T, self.likelihood.Y)
+        For a specific output, calls predict() at the new point(s) Xnew.
-        if full_cov:
+        This functions calls _add_output_index(), so Xnew should not have an index column specifying the output.
-            Kxx = self.kern.K(_Xnew, which_parts=which_parts)
+
-            var = Kxx - np.dot(KiKx.T, Kx)
+        :param Xnew: The points at which to make a prediction
-        else:
+        :type Xnew: np.ndarray, Nnew x self.input_dim
-            Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
-            var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
+        :type which_parts: ('all', list of bools)
-            var = var[:, None]
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
-        if stop:
+        :type full_cov: bool
-            debug_this # @UndefinedVariable
+        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
-        return mu, var
+        :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
        .. Note:: For multiple non-independent outputs models only.
        """
        Xnew = self._add_output_index(Xnew, output)
        return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -3,13 +3,14 @@ from .. import kern
 from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
 import pylab as pb
 from GPy.core.model import Model
 import warnings
 from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
 class GPBase(Model):
    """
    Gaussian process base model for holding shared behaviour between
    sparse_GP and GP models.
    """
    def __init__(self, X, likelihood, kernel, normalize_X=False):
        self.X = X
        assert len(self.X.shape) == 2
@ -57,7 +58,59 @@ class GPBase(Model):
        self.X = state.pop()
        Model.setstate(self, state)
-    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
+    def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
        """
        Samples the posterior GP at the points X.
        :param X: The points at which to take the samples.
        :type X: np.ndarray, Nnew x self.input_dim.
        :param size: the number of a posteriori samples to plot.
        :type size: int.
        :param which_parts: which of the kernel functions to plot (additively).
        :type which_parts: 'all', or list of bools.
        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
        :type full_cov: bool.
        :returns: Ysim: set of simulations, a Numpy array (N x samples).
        """
        m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
        v = v.reshape(m.size,-1) if len(v.shape)==3 else v
        if not full_cov:
            Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
        else:
            Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
        return Ysim
    def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
        """
        Samples the posterior GP at the points X.
        :param X: the points at which to take the samples.
        :type X: np.ndarray, Nnew x self.input_dim.
        :param size: the number of a posteriori samples to plot.
        :type size: int.
        :param which_parts: which of the kernel functions to plot (additively).
        :type which_parts: 'all', or list of bools.
        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
        :type full_cov: bool.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations, a Numpy array (N x samples).
        """
        Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
        if isinstance(self.likelihood,Gaussian):
            noise_std = np.sqrt(self.likelihood._get_params())
            Ysim += np.random.normal(0,noise_std,Ysim.shape)
        elif isinstance(self.likelihood,Gaussian_Mixed_Noise):
            assert noise_model is not None, "A noise model must be specified."
            noise_std = np.sqrt(self.likelihood._get_params()[noise_model])
            Ysim += np.random.normal(0,noise_std,Ysim.shape)
        else:
            Ysim = self.likelihood.noise_model.samples(Ysim)
        return Ysim
    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
        """
        Plot the GP's view of the world, where the data is normalized and the
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -89,82 +142,41 @@ class GPBase(Model):
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
-        if not hasattr(self,'multioutput'):
+        if self.X.shape[1] == 1:
            resolution = resolution or 200
            Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-            if self.X.shape[1] == 1:
+            m, v = self._raw_predict(Xnew, which_parts=which_parts)
-                Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
+            if samples:
-                if samples == 0:
+                Ysim = self.posterior_samples_f(Xnew, samples, which_parts=which_parts, full_cov=True)
-                    m, v = self._raw_predict(Xnew, which_parts=which_parts)
+                for yi in Ysim.T:
-                    gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
-                    ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
+            gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
                else:
                    m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
                    v = v.reshape(m.size,-1) if len(v.shape)==3 else v
                    Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
                    gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
                    for i in range(samples):
                        ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
-                ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
+            ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
-                ax.set_xlim(xmin, xmax)
+            ax.set_xlim(xmin, xmax)
-                ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
+            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
-                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
-                ax.set_ylim(ymin, ymax)
+            ax.set_ylim(ymin, ymax)
-                if hasattr(self,'Z'):
+        elif self.X.shape[1] == 2:
                    Zu = self.Z * self._Xscale + self._Xoffset
                    ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
-            elif self.X.shape[1] == 2:
+            resolution = resolution or 50
-                resolution = resolution or 50
+            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
-                Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
+            m, v = self._raw_predict(Xnew, which_parts=which_parts)
-                m, v = self._raw_predict(Xnew, which_parts=which_parts)
+            m = m.reshape(resolution, resolution).T
-                m = m.reshape(resolution, resolution).T
+            ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
-                ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
+            ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
-                ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
+            ax.set_xlim(xmin[0], xmax[0])
-                ax.set_xlim(xmin[0], xmax[0])
+            ax.set_ylim(xmin[1], xmax[1])
-                ax.set_ylim(xmin[1], xmax[1])
+
            if samples:
                warnings.warn("Samples only implemented for 1 dimensional inputs.")
            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
        else:
-            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
-            if self.X.shape[1] == 2:
+    def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
                Xu = self.X[self.X[:,-1]==output ,0:1]
                Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
                if samples == 0:
                    m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
                    gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
                    ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
                else:
                    m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
                    v = v.reshape(m.size,-1) if len(v.shape)==3 else v
                    Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
                    gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
                    for i in range(samples):
                        ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
                ax.set_xlim(xmin, xmax)
                ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
                ax.set_ylim(ymin, ymax)
            elif self.X.shape[1] == 3:
                raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
                assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
        if hasattr(self,'Z'):
            Zu = self.Z[self.Z[:,-1]==output,:]
            Zu = self.Z * self._Xscale + self._Xoffset
            Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
    def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
        """
        Plot the GP with noise where the likelihood is Gaussian.
@ -200,7 +212,6 @@ class GPBase(Model):
        :param fillcol: color of fill
        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        """
        # TODO include samples
        if which_data == 'all':
            which_data = slice(None)
@ -208,98 +219,202 @@ class GPBase(Model):
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
-        if not hasattr(self,'multioutput'):
+        plotdims = self.input_dim - len(fixed_inputs)
        if plotdims == 1:
            resolution = resolution or 200
-            plotdims = self.input_dim - len(fixed_inputs)
+            Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
            if plotdims == 1:
                resolution = resolution or 200
-                Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
+            fixed_dims = np.array([i for i,v in fixed_inputs])
            freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
-                fixed_dims = np.array([i for i,v in fixed_inputs])
+            Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
-                freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
+            Xgrid = np.empty((Xnew.shape[0],self.input_dim))
            Xgrid[:,freedim] = Xnew
            for i,v in fixed_inputs:
                Xgrid[:,i] = v
-                Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
+            m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
                Xgrid = np.empty((Xnew.shape[0],self.input_dim))
                Xgrid[:,freedim] = Xnew
                for i,v in fixed_inputs:
                    Xgrid[:,i] = v
-                m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
+            if samples: #NOTE not tested with fixed_inputs
-                for d in range(m.shape[1]):
+                Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
-                    gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
+                for yi in Ysim.T:
-                    ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
-                ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
+                    #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
                ax.set_xlim(xmin, xmax)
                ax.set_ylim(ymin, ymax)
            for d in range(m.shape[1]):
                gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
                ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
            ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
            ax.set_xlim(xmin, xmax)
            ax.set_ylim(ymin, ymax)
        elif self.X.shape[1] == 2:
-                Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution)
+            resolution = resolution or 50
-                m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
+            Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
-                for d in range(m.shape[1]):
+            x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
-                    gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
+            m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
-                    ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
+            m = m.reshape(resolution, resolution).T
-                ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
+            ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
-                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            Yf = self.likelihood.Y.flatten()
-                ax.set_xlim(xmin, xmax)
+            ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
-                ax.set_ylim(ymin, ymax)
+            ax.set_xlim(xmin[0], xmax[0])
            ax.set_ylim(xmin[1], xmax[1])
-            elif self.X.shape[1] == 2:
+            if samples:
-                resolution = resolution or 50
+                warnings.warn("Samples only implemented for 1 dimensional inputs.")
                Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
                x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
                m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
                m = m.reshape(resolution, resolution).T
                ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
                Yf = self.likelihood.Y.flatten()
                ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
                ax.set_xlim(xmin[0], xmax[0])
                ax.set_ylim(xmin[1], xmax[1])
            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
        else:
-            assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
            if self.X.shape[1] == 2:
                resolution = resolution or 200
                Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
                Xu = self.X * self._Xscale + self._Xoffset
                Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
-                Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
+    def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
-                m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output)
+        """
        For a specific output, in a multioutput model, this function works just as plot_f on single output models.
-                for d in range(m.shape[1]):
+        :param output: which output to plot (for multiple output models only)
-                    gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
+        :type output: integer (first output is 0)
-                    ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
+        :param samples: the number of a posteriori samples to plot
-                ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-                ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+        :param which_data: which if the training data to plot (default all)
-                ax.set_xlim(xmin, xmax)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
-                ax.set_ylim(ymin, ymax)
+        :param which_parts: which of the kernel functions to plot (additively)
        :type which_parts: 'all', or list of bools
        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
        :type resolution: int
        :param full_cov:
        :type full_cov: bool
                :param fignum: figure to plot on.
        :type fignum: figure number
        :param ax: axes to plot on.
        :type ax: axes handle
        """
        assert output is not None, "An output must be specified."
        assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
            elif self.X.shape[1] == 3:
                raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs"
                resolution = resolution or 50
            else:
                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    """
    def samples_f(self,X,samples=10, which_data='all', which_parts='all',output=None):
        if which_data == 'all':
            which_data = slice(None)
-        if hasattr(self,'multioutput'):
+        if ax is None:
-            np.hstack([X,np.ones((X.shape[0],1))*output])
+            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
-        m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
+        if self.X.shape[1] == 2:
-        v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+            Xu = self.X[self.X[:,-1]==output ,0:1]
-        Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
+            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
-        #gpplot(X, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
+            Xnew_indexed = self._add_output_index(Xnew,output)
        for i in range(samples):
            ax.plot(X, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
-    """
+            m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
            if samples:
                Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
                for yi in Ysim.T:
                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
            gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
            ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
            ax.set_xlim(xmin, xmax)
            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
            ax.set_ylim(ymin, ymax)
        elif self.X.shape[1] == 3:
            raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
            #if samples:
            #    warnings.warn("Samples only implemented for 1 dimensional inputs.")
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
        """
        For a specific output, in a multioutput model, this function works just as plot_f on single output models.
        :param output: which output to plot (for multiple output models only)
        :type output: integer (first output is 0)
        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
        :type plot_limits: np.array
        :param which_data: which if the training data to plot (default all)
        :type which_data: 'all' or a slice object to slice self.X, self.Y
        :param which_parts: which of the kernel functions to plot (additively)
        :type which_parts: 'all', or list of bools
        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
        :type resolution: int
        :param levels: number of levels to plot in a contour plot.
        :type levels: int
        :param samples: the number of a posteriori samples to plot
        :type samples: int
        :param fignum: figure to plot on.
        :type fignum: figure number
        :param ax: axes to plot on.
        :type ax: axes handle
        :type output: integer (first output is 0)
        :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
        :type fixed_inputs: a list of tuples
        :param linecol: color of line to plot.
        :type linecol:
        :param fillcol: color of fill
        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        """
        assert output is not None, "An output must be specified."
        assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
        if which_data == 'all':
            which_data = slice(None)
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        if self.X.shape[1] == 2:
            resolution = resolution or 200
            Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
            Xu = self.X * self._Xscale + self._Xoffset
            Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
            Xnew_indexed = self._add_output_index(Xnew,output)
            m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
            if samples: #NOTE not tested with fixed_inputs
                Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
                for yi in Ysim.T:
                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
            for d in range(m.shape[1]):
                gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
                ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
            ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
            ax.set_xlim(xmin, xmax)
            ax.set_ylim(ymin, ymax)
        elif self.X.shape[1] == 3:
            raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
            #if samples:
            #    warnings.warn("Samples only implemented for 1 dimensional inputs.")
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    def _add_output_index(self,X,output):
        """
        In a multioutput model, appends an index column to X to specify the output it is related to.
        :param X: Input data
        :type X: np.ndarray, N x self.input_dim
        :param output: output X is related to
        :type output: integer in {0,..., output_dim-1}
        .. Note:: For multiple non-independent outputs models only.
        """
        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
        index = np.ones((X.shape[0],1))*output
        return np.hstack((X,index))
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -259,7 +259,7 @@ class Model(Parameterized):
        these terms are present in the name the parameter is
        constrained positive.
        """
-        positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
+        positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa']
        # param_names = self._get_param_names()
        currently_constrained = self.all_constrained_indices()
        to_make_positive = []
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -34,7 +34,6 @@ class SparseGP(GPBase):
        self.Z = Z
        self.num_inducing = Z.shape[0]
 #         self.likelihood = likelihood
        if X_variance is None:
            self.has_uncertain_inputs = False
@ -157,7 +156,7 @@ class SparseGP(GPBase):
        # the partial derivative vector for the likelihood
-        if self.likelihood.Nparams == 0:
+        if self.likelihood.num_params == 0:
            # save computation here.
            self.partial_for_likelihood = None
        elif self.likelihood.is_heteroscedastic:
@ -305,9 +304,8 @@ class SparseGP(GPBase):
        return mu, var[:, None]
-    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
+    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False, **likelihood_args):
        """
        Predict the function(s) at the new point(s) Xnew.
        **Arguments**
@ -338,56 +336,90 @@ class SparseGP(GPBase):
        mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
        # now push through likelihood
-        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
        return mean, var, _025pm, _975pm
-    def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None):
+
    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
        """
        Plot the GP's view of the world, where the data is normalized and the
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
          - In two dimsensions, a contour-plot shows the mean predicted function
          - Not implemented in higher dimensions
        :param samples: the number of a posteriori samples to plot
        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
        :param which_data: which if the training data to plot (default all)
        :type which_data: 'all' or a slice object to slice self.X, self.Y
        :param which_parts: which of the kernel functions to plot (additively)
        :type which_parts: 'all', or list of bools
        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
        :type resolution: int
        :param full_cov:
        :type full_cov: bool
                :param fignum: figure to plot on.
        :type fignum: figure number
        :param ax: axes to plot on.
        :type ax: axes handle
        :param output: which output to plot (for multiple output models only)
        :type output: integer (first output is 0)
        """
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        if fignum is None and ax is None:
                fignum = fig.num
        if which_data is 'all':
            which_data = slice(None)
-        GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
+        GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
-        if not hasattr(self,'multioutput'):
+        if self.X.shape[1] == 1:
            if self.has_uncertain_inputs:
                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
            Zu = self.Z * self._Xscale + self._Xoffset
            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
-            if self.X.shape[1] == 1:
+        elif self.X.shape[1] == 2:
-                if self.has_uncertain_inputs:
+            Zu = self.Z * self._Xscale + self._Xoffset
-                    Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
                    ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                                xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
                Zu = self.Z * self._Xscale + self._Xoffset
                ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
            elif self.X.shape[1] == 2:
                Zu = self.Z * self._Xscale + self._Xoffset
                ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
        else:
-            if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
                """
                Xu = self.X[self.X[:,-1]==output,:]
                if self.has_uncertain_inputs:
                    Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
-                    Xu = self.X[self.X[:,-1]==output ,0:1] #??
+    def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        if fignum is None and ax is None:
                fignum = fig.num
        if which_data is 'all':
            which_data = slice(None)
-                    ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+        GPBase.plot(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax)
                                xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                                ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
-                """
+        if self.X.shape[1] == 1:
-                Zu = self.Z[self.Z[:,-1]==output,:]
+            if self.has_uncertain_inputs:
-                Zu = self.Z * self._Xscale + self._Xoffset
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
-                Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
+                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
-                ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
-                #ax.set_ylim(ax.get_ylim()[0],)
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
            Zu = self.Z * self._Xscale + self._Xoffset
            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
-            else:
+        elif self.X.shape[1] == 2:
-                raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+            Zu = self.Z * self._Xscale + self._Xoffset
            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
        """
@ -470,3 +502,64 @@ class SparseGP(GPBase):
                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
        return mu, var[:, None]
    def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        if fignum is None and ax is None:
                fignum = fig.num
        if which_data is 'all':
            which_data = slice(None)
        GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
        if self.X.shape[1] == 2:
            if self.has_uncertain_inputs:
                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
            Zu = self.Z * self._Xscale + self._Xoffset
            Zu = Zu[Zu[:,1]==output,0:1]
            ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
        elif self.X.shape[1] == 2:
            Zu = self.Z * self._Xscale + self._Xoffset
            Zu = Zu[Zu[:,1]==output,0:2]
            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
    def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
        if fignum is None and ax is None:
                fignum = fig.num
        if which_data is 'all':
            which_data = slice(None)
        GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output)
        if self.X.shape[1] == 2:
            if self.has_uncertain_inputs:
                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
            Zu = self.Z * self._Xscale + self._Xoffset
            Zu = Zu[Zu[:,1]==output,0:1]
            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
        elif self.X.shape[1] == 3:
            Zu = self.Z * self._Xscale + self._Xoffset
            Zu = Zu[Zu[:,1]==output,0:1]
            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
--- a/GPy/core/svigp.py
+++ b/GPy/core/svigp.py
@ -350,8 +350,8 @@ class SVIGP(GPBase):
            #callback
            if i and not i%callback_interval:
-                callback()
+                callback(self) # Change this to callback()
-                time.sleep(0.1)
+                time.sleep(0.01)
            if self.epochs > 10:
                self._adapt_steplength()
@ -367,13 +367,13 @@ class SVIGP(GPBase):
        assert self.vb_steplength > 0
        if self.adapt_param_steplength:
-            # self._adaptive_param_steplength()
+            self._adaptive_param_steplength()
            # self._adaptive_param_steplength_log()
-            self._adaptive_param_steplength_from_vb()
+            # self._adaptive_param_steplength_from_vb()
        self._param_steplength_trace.append(self.param_steplength)
    def _adaptive_param_steplength(self):
-        decr_factor = 0.1
+        decr_factor = 0.02
        g_tp = self._transform_gradients(self._log_likelihood_gradients())
        self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
        self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
@ -407,7 +407,7 @@ class SVIGP(GPBase):
        self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
    def _adaptive_vb_steplength_KL(self):
-        decr_factor = 1 #0.1
+        decr_factor = 0.1
        natgrad = self.vb_grad_natgrad()
        g_t1 = natgrad[0]
        g_t2 = natgrad[1]
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -26,7 +26,7 @@ def BGPLVM(seed=default_seed):
    lik = Gaussian(Y, normalize=True)
    k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
-    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+    # k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
    m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing)
@ -331,27 +331,46 @@ def brendan_faces():
    from GPy import kern
    data = GPy.util.datasets.brendan_faces()
    Q = 2
-    Y = data['Y'][0:-1:10, :]
+    Y = data['Y']
    # Y = data['Y']
    Yn = Y - Y.mean()
    Yn /= Yn.std()
    m = GPy.models.GPLVM(Yn, Q)
    # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100)
    # optimize
    m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
-    m.optimize('scg', messages=1, max_f_eval=10000)
+    m.optimize('scg', messages=1, max_iters=1000)
    ax = m.plot_latent(which_indices=(0, 1))
    y = m.likelihood.Y[0, :]
-    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
+    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
    lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
    raw_input('Press enter to finish')
    return m
 def olivetti_faces():
    from GPy import kern
    data = GPy.util.datasets.olivetti_faces()
    Q = 2
    Y = data['Y']
    Yn = Y - Y.mean()
    Yn /= Yn.std()
    m = GPy.models.GPLVM(Yn, Q)
    m.optimize('scg', messages=1, max_iters=1000)
    ax = m.plot_latent(which_indices=(0, 1))
    y = m.likelihood.Y[0, :]
    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
    lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
    raw_input('Press enter to finish')
    return m
 def stick_play(range=None, frame_rate=15):
    data = GPy.util.datasets.osu_run1()
    # optimize
    if range == None:
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -57,8 +57,8 @@ def coregionalization_toy(max_iters=100):
    m.optimize(max_iters=max_iters)
    fig, axes = pb.subplots(2,1)
-    m.plot(output=0,ax=axes[0])
+    m.plot_single_output(output=0,ax=axes[0])
-    m.plot(output=1,ax=axes[1])
+    m.plot_single_output(output=1,ax=axes[1])
    axes[0].set_title('Output 0')
    axes[1].set_title('Output 1')
    return m
@ -77,14 +77,14 @@ def coregionalization_sparse(max_iters=100):
    k1 = GPy.kern.rbf(1)
-    m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20)
+    m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=5)
    m.constrain_fixed('.*rbf_var',1.)
-    m.optimize(messages=1)
+    #m.optimize(messages=1)
-    #m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
+    m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
    fig, axes = pb.subplots(2,1)
-    m.plot(output=0,ax=axes[0])
+    m.plot_single_output(output=0,ax=axes[0],plot_limits=(-1,9))
-    m.plot(output=1,ax=axes[1])
+    m.plot_single_output(output=1,ax=axes[1],plot_limits=(-1,9))
    axes[0].set_title('Output 0')
    axes[1].set_title('Output 1')
    return m
--- a/GPy/gpy_config.cfg
+++ b/GPy/gpy_config.cfg
@ -0,0 +1,7 @@
 # This is the configuration file for GPy
 [parallel]
 # Enable openmp support. This speeds up some computations, depending on the number
 # of cores available. Setting up a compiler with openmp support can be difficult on 
 # some platforms, hence this option.
 openmp=True
--- a/GPy/inference/scg.py
+++ b/GPy/inference/scg.py
@ -62,7 +62,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
    fnow = fold
    gradnew = gradf(x, *optargs) # Initial gradient.
    if any(np.isnan(gradnew)):
-        raise UnexpectedInfOrNan
+        raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
    current_grad = np.dot(gradnew, gradnew)
    gradold = gradnew.copy()
    d = -gradnew # Initial search direction.
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -298,43 +298,67 @@ if sympy_available:
        """
        Radial Basis Function covariance.
        """
-        X = [sp.var('x%i' % i) for i in range(input_dim)]
+        X = sp.symbols('x_:' + str(input_dim))
-        Z = [sp.var('z%i' % i) for i in range(input_dim)]
+        Z = sp.symbols('z_:' + str(input_dim))
        variance = sp.var('variance',positive=True)
        if ARD:
-            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
+            lengthscales = sp.symbols('lengthscale_:' + str(input_dim))
-            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
            f =  variance*sp.exp(-dist/2.)
        else:
            lengthscale = sp.var('lengthscale',positive=True)
-            dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
+            dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
            f =  variance*sp.exp(-dist/(2*lengthscale**2))
        return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
    def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
        """
        Exponentiated quadratic with multiple outputs.
        """
        real_input_dim = input_dim
        if output_dim>1:
            real_input_dim -= 1
        X = sp.symbols('x_:' + str(real_input_dim))
        Z = sp.symbols('z_:' + str(real_input_dim))
        scale = sp.var('scale_i scale_j',positive=True)
        if ARD:
            lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
            shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
            dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
            dist = parse_expr(dist_string)
            f =  variance*sp.exp(-dist/2.)
        else:
            lengthscales = sp.var('lengthscale_i lengthscale_j',positive=True)
            shared_lengthscale = sp.var('shared_lengthscale',positive=True)
            dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
            dist = parse_expr(dist_string)
            f =  scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2)))
        return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
    def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
        """
        TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
        sinc covariance funciton
        """
-        X = [sp.var('x%i' % i) for i in range(input_dim)]
+        X = sp.symbols('x_:' + str(input_dim))
-        Z = [sp.var('z%i' % i) for i in range(input_dim)]
+        Z = sp.symbols('z_:' + str(input_dim))
        variance = sp.var('variance',positive=True)
        if ARD:
            lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
-            dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
+            dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
            f =  variance*sinc(sp.pi*sp.sqrt(dist))
        else:
            lengthscale = sp.var('lengthscale',positive=True)
-            dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
+            dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
            dist = parse_expr(dist_string)
            f =  variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
        return kern(input_dim, [spkern(input_dim, f, name='sinc')])
-    def sympykern(input_dim, k,name=None):
+    def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
        """
        A base kernel object, where all the hard work in done by sympy.
@ -349,7 +373,7 @@ if sympy_available:
         - to handle multiple inputs, call them x1, z1, etc
         - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
        """
-        return kern(input_dim, [spkern(input_dim, k,name)])
+        return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)])
 del sympy_available
 def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -31,7 +31,7 @@ class kern(Parameterized):
        """
        self.parts = parts
-        self.Nparts = len(parts)
+        self.num_parts = len(parts)
        self.num_params = sum([p.num_params for p in self.parts])
        self.input_dim = input_dim
@ -61,7 +61,7 @@ class kern(Parameterized):
        here just all the indices, rest can get recomputed
        """
        return Parameterized.getstate(self) + [self.parts,
-                self.Nparts,
+                self.num_parts,
                self.num_params,
                self.input_dim,
                self.input_slices,
@ -73,21 +73,20 @@ class kern(Parameterized):
        self.input_slices = state.pop()
        self.input_dim = state.pop()
        self.num_params = state.pop()
-        self.Nparts = state.pop()
+        self.num_parts = state.pop()
        self.parts = state.pop()
        Parameterized.setstate(self, state)
    def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
-        """If an ARD kernel is present, it bar-plots the ARD parameters.
+        """If an ARD kernel is present, plot a bar representation using matplotlib
        :param fignum: figure number of the plot
        :param ax: matplotlib axis to plot on
-        :param title: 
+        :param title:
-            title of the plot, 
+            title of the plot,
            pass '' to not print a title
            pass None for a generic title
        """
        if ax is None:
            fig = pb.figure(fignum)
@ -152,6 +151,13 @@ class kern(Parameterized):
        return ax
    def _transform_gradients(self, g):
        """
        Apply the transformations of the kernel so that the returned vector
        represents the gradient in the transformed space (i.e. that given by
        get_params_transformed())
        :param g: the gradient vector for the current model, usually created by dK_dtheta
        """
        x = self._get_params()
        [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
        [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
@ -162,7 +168,9 @@ class kern(Parameterized):
            return g
    def compute_param_slices(self):
-        """create a set of slices that can index the parameters of each part."""
+        """
        Create a set of slices that can index the parameters of each part.
        """
        self.param_slices = []
        count = 0
        for p in self.parts:
@ -170,14 +178,19 @@ class kern(Parameterized):
            count += p.num_params
    def __add__(self, other):
-        """
+        """ Overloading of the '+' operator. for more control, see self.add """
        Shortcut for `add`.
        """
        return self.add(other)
    def add(self, other, tensor=False):
        """
-        Add another kernel to this one. Both kernels are defined on the same _space_
+        Add another kernel to this one.
        If Tensor is False, both kernels are defined on the same _space_. then
        the created kernel will have the same number of inputs as self and
        other (which must be the same).
        If Tensor is True, then the dimensions are stacked 'horizontally', so
        that the resulting kernel has self.input_dim + other.input_dim
        :param other: the other kernel to be added
        :type other: GPy.kern
@ -210,9 +223,7 @@ class kern(Parameterized):
        return newkern
    def __mul__(self, other):
-        """
+        """ Here we overload the '*' operator. See self.prod for more information"""
        Shortcut for `prod`.
        """
        return self.prod(other)
    def __pow__(self, other, tensor=False):
@ -228,7 +239,7 @@ class kern(Parameterized):
        :param other: the other kernel to be added
        :type other: GPy.kern
        :param tensor: whether or not to use the tensor space (default is false).
-        :type tensor: bool 
+        :type tensor: bool
        """
        K1 = self.copy()
@ -307,8 +318,19 @@ class kern(Parameterized):
        return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
    def K(self, X, X2=None, which_parts='all'):
        """
        Compute the kernel function.
        :param X: the first set of inputs to the kernel
        :param X2: (optional) the second set of arguments to the kernel. If X2
                   is None, this is passed throgh to the 'part' object, which
                   handles this as X2 == X.
        :param which_parts: a list of booleans detailing whether to include
                            each of the part functions. By default, 'all'
                            indicates [True]*self.num_parts
        """
        if which_parts == 'all':
-            which_parts = [True] * self.Nparts
+            which_parts = [True] * self.num_parts
        assert X.shape[1] == self.input_dim
        if X2 is None:
            target = np.zeros((X.shape[0], X.shape[0]))
@ -321,7 +343,7 @@ class kern(Parameterized):
    def dK_dtheta(self, dL_dK, X, X2=None):
        """
        Compute the gradient of the covariance function with respect to the parameters.
-        
+
        :param dL_dK: An array of gradients of the objective function with respect to the covariance function.
        :type dL_dK: Np.ndarray (num_samples x num_inducing)
        :param X: Observed data inputs
@ -329,6 +351,7 @@ class kern(Parameterized):
        :param X2: Observed data inputs (optional, defaults to X)
        :type X2: np.ndarray (num_inducing x input_dim)
        returns: dL_dtheta
        """
        assert X.shape[1] == self.input_dim
        target = np.zeros(self.num_params)
@ -340,7 +363,7 @@ class kern(Parameterized):
        return self._transform_gradients(target)
    def dK_dX(self, dL_dK, X, X2=None):
-        """Compute the gradient of the covariance function with respect to X.
+        """Compute the gradient of the objective function with respect to X.
        :param dL_dK: An array of gradients of the objective function with respect to the covariance function.
        :type dL_dK: np.ndarray (num_samples x num_inducing)
@ -359,7 +382,7 @@ class kern(Parameterized):
    def Kdiag(self, X, which_parts='all'):
        """Compute the diagonal of the covariance function for inputs X."""
        if which_parts == 'all':
-            which_parts = [True] * self.Nparts
+            which_parts = [True] * self.num_parts
        assert X.shape[1] == self.input_dim
        target = np.zeros(X.shape[0])
        [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
@ -497,7 +520,7 @@ class kern(Parameterized):
    def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
        if which_parts == 'all':
-            which_parts = [True] * self.Nparts
+            which_parts = [True] * self.num_parts
        if self.input_dim == 1:
            if x is None:
                x = np.zeros((1, 1))
@ -658,7 +681,7 @@ class Kern_check_dKdiag_dX(Kern_check_model):
    def _set_params(self, x):
        self.X=x.reshape(self.X.shape)
-def kern_test(kern, X=None, X2=None, verbose=False):
+def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
    """This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
    :param kern: the kernel to be tested.
@ -672,8 +695,13 @@ def kern_test(kern, X=None, X2=None, verbose=False):
    pass_checks = True
    if X==None:
        X = np.random.randn(10, kern.input_dim)
        if output_ind is not None:
            X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
    if X2==None:
        X2 = np.random.randn(20, kern.input_dim)
        if output_ind is not None:
            X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
    if verbose:
        print("Checking covariance function is positive definite.")
    result = Kern_check_model(kern, X=X).is_positive_definite()
--- a/GPy/kern/parts/hierarchical.py
+++ b/GPy/kern/parts/hierarchical.py
@ -7,7 +7,7 @@ from independent_outputs import index_to_slices
 class Hierarchical(Kernpart):
    """
-    A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs
+    A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs
    """
    def __init__(self,parts):
--- a/GPy/kern/parts/kernpart.py
+++ b/GPy/kern/parts/kernpart.py
@ -5,15 +5,18 @@
 class Kernpart(object):
    def __init__(self,input_dim):
        """
-        The base class for a kernpart: a positive definite function which forms part of a kernel
+        The base class for a kernpart: a positive definite function which forms part of a covariance function (kernel).
        :param input_dim: the number of input dimensions to the function
        :type input_dim: int
        Do not instantiate.
        """
        # the input dimensionality for the covariance
        self.input_dim = input_dim
        # the number of optimisable parameters
        self.num_params = 1
        # the name of the covariance function.
        self.name = 'unnamed'
    def _get_params(self):
--- a/GPy/kern/parts/linear.py
+++ b/GPy/kern/parts/linear.py
@ -7,6 +7,7 @@ import numpy as np
 from ...util.linalg import tdot
 from ...util.misc import fast_array_equal
 from scipy import weave
 from ...util.config import *
 class Linear(Kernpart):
    """
@ -51,6 +52,26 @@ class Linear(Kernpart):
        self._Z, self._mu, self._S = np.empty(shape=(3, 1))
        self._X, self._X2, self._params = np.empty(shape=(3, 1))
        # a set of optional args to pass to weave
        weave_options_openmp = {'headers'           : ['<omp.h>'],
                                'extra_compile_args': ['-fopenmp -O3'],
                                'extra_link_args'   : ['-lgomp'],
                                'libraries': ['gomp']}
        weave_options_noopenmp = {'extra_compile_args': ['-O3']}
        if config.getboolean('parallel', 'openmp'):
            self.weave_options = weave_options_openmp
            self.weave_support_code =  """
            #include <omp.h>
            #include <math.h>
            """
        else:
            self.weave_options = weave_options_noopenmp
            self.weave_support_code = """
            #include <math.h>
            """
    def _get_params(self):
        return self.variances
@ -190,11 +211,17 @@ class Linear(Kernpart):
        #target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
        #target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
        if config.getboolean('parallel', 'openmp'):
            pragma_string = "#pragma omp parallel for private(m,mm,q,qq,factor,tmp)"
        else:
            pragma_string = ''
        #Using weave, we can exploiut the symmetry of this problem:
        code = """
        int n, m, mm,q,qq;
        double factor,tmp;
-        #pragma omp parallel for private(m,mm,q,qq,factor,tmp)
+        %s
        for(n=0;n<N;n++){
          for(m=0;m<num_inducing;m++){
            for(mm=0;mm<=m;mm++){
@ -218,19 +245,13 @@ class Linear(Kernpart):
            }
          }
        }
-        """
+        """ % pragma_string
        support_code = """
        #include <omp.h>
        #include <math.h>
        """
        weave_options = {'headers'           : ['<omp.h>'],
                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
                         'extra_link_args'   : ['-lgomp']}
-        N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
+
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
-                     arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
+        weave.inline(code, support_code=self.weave_support_code,
-                     type_converters=weave.converters.blitz,**weave_options)
+                    arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
                    type_converters=weave.converters.blitz,**self.weave_options)
    def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
@ -240,9 +261,15 @@ class Linear(Kernpart):
        #dummy_target += psi2_dZ.sum(0).sum(0)
        AZA = self.variances*self.ZAinner
        if config.getboolean('parallel', 'openmp'):
            pragma_string = '#pragma omp parallel for private(n,mm,q)'
        else:
            pragma_string = ''
        code="""
        int n,m,mm,q;
-        #pragma omp parallel for private(n,mm,q)
+        %s
        for(m=0;m<num_inducing;m++){
          for(q=0;q<input_dim;q++){
            for(mm=0;mm<num_inducing;mm++){
@ -252,22 +279,13 @@ class Linear(Kernpart):
            }
          }
        }
-        """
+        """ % pragma_string
        support_code = """
        #include <omp.h>
        #include <math.h>
        """
        weave_options = {'headers'           : ['<omp.h>'],
                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
                         'extra_link_args'   : ['-lgomp']}
-        N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
+
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1])
        weave.inline(code, support_code=self.weave_support_code, 
                     arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
-                     type_converters=weave.converters.blitz,**weave_options)
+                     type_converters=weave.converters.blitz,**self.weave_options)
    #---------------------------------------#
--- a/GPy/kern/parts/periodic_Matern32.py
+++ b/GPy/kern/parts/periodic_Matern32.py
@ -113,7 +113,7 @@ class PeriodicMatern32(Kernpart):
    @silence_errors
    def dK_dtheta(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
+        """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
        if X2 is None: X2 = X
        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
--- a/GPy/kern/parts/periodic_Matern52.py
+++ b/GPy/kern/parts/periodic_Matern52.py
@ -115,7 +115,7 @@ class PeriodicMatern52(Kernpart):
    @silence_errors
    def dK_dtheta(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
+        """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
        if X2 is None: X2 = X
        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
--- a/GPy/kern/parts/periodic_exponential.py
+++ b/GPy/kern/parts/periodic_exponential.py
@ -111,7 +111,7 @@ class PeriodicExponential(Kernpart):
    @silence_errors
    def dK_dtheta(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
+        """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
        if X2 is None: X2 = X
        FX  = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
        FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
--- a/GPy/kern/parts/rbf.py
+++ b/GPy/kern/parts/rbf.py
@ -7,6 +7,7 @@ import numpy as np
 from scipy import weave
 from ...util.linalg import tdot
 from ...util.misc import fast_array_equal
 from ...util.config import *
 class RBF(Kernpart):
    """
@ -57,12 +58,27 @@ class RBF(Kernpart):
        self._X, self._X2, self._params = np.empty(shape=(3, 1))
        # a set of optional args to pass to weave
-        self.weave_options = {'headers'           : ['<omp.h>'],
+        weave_options_openmp = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
+                                'extra_compile_args': ['-fopenmp -O3'],
-                         'extra_link_args'   : ['-lgomp']}
+                                'extra_link_args'   : ['-lgomp'],
                                'libraries': ['gomp']}
        weave_options_noopenmp = {'extra_compile_args': ['-O3']}
        if config.getboolean('parallel', 'openmp'):
            self.weave_options = weave_options_openmp
            self.weave_support_code =  """
            #include <omp.h>
            #include <math.h>
            """
        else:
            self.weave_options = weave_options_noopenmp
            self.weave_support_code = """
            #include <math.h>
            """
    def _get_params(self):
        return np.hstack((self.variance, self.lengthscale))
@ -110,7 +126,7 @@ class RBF(Kernpart):
                  target(q+1) += var_len3(q)*tmp;
                }
                """
-                num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
+                num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
            else:
                code = """
@ -126,7 +142,7 @@ class RBF(Kernpart):
                  target(q+1) += var_len3(q)*tmp;
                }
                """
-                num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
+                num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
                # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
        else:
@ -287,10 +303,16 @@ class RBF(Kernpart):
            lengthscale2 = self.lengthscale2
        else:
            lengthscale2 = np.ones(input_dim) * self.lengthscale2
        if config.getboolean('parallel', 'openmp'):
            pragma_string = '#pragma omp parallel for private(tmp)'
        else:
            pragma_string = ''
        code = """
        double tmp;
-        #pragma omp parallel for private(tmp)
+        %s
        for (int n=0; n<N; n++){
            for (int m=0; m<num_inducing; m++){
               for (int mm=0; mm<(m+1); mm++){
@ -320,13 +342,20 @@ class RBF(Kernpart):
            }
        }
-        """
+        """ % pragma_string
        if config.getboolean('parallel', 'openmp'):
            pragma_string = '#include <omp.h>'
        else:
            pragma_string = ''
        support_code = """
-        #include <omp.h>
+        %s
        #include <math.h>
-        """
+        """ % pragma_string
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+
        N, num_inducing, input_dim = int(N), int(num_inducing), int(input_dim)
        weave.inline(code, support_code=support_code,
                     arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
                     type_converters=weave.converters.blitz, **self.weave_options)
--- a/GPy/kern/parts/rbf_inv.py
+++ b/GPy/kern/parts/rbf_inv.py
@ -7,6 +7,8 @@ import numpy as np
 import hashlib
 from scipy import weave
 from ...util.linalg import tdot
 from ...util.config import *
 class RBFInv(RBF):
    """
@ -58,11 +60,23 @@ class RBFInv(RBF):
        self._X, self._X2, self._params = np.empty(shape=(3, 1))
        # a set of optional args to pass to weave
-        self.weave_options = {'headers'           : ['<omp.h>'],
+        weave_options_openmp = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
+                                'extra_compile_args': ['-fopenmp -O3'],
-                         'extra_link_args'   : ['-lgomp']}
+                                'extra_link_args'   : ['-lgomp'],
-
+                                'libraries': ['gomp']}
        weave_options_noopenmp = {'extra_compile_args': ['-O3']}
        if config.getboolean('parallel', 'openmp'):
            self.weave_options = weave_options_openmp
            self.weave_support_code =  """
            #include <omp.h>
            #include <math.h>
            """
        else:
            self.weave_options = weave_options_noopenmp
            self.weave_support_code = """
            #include <math.h>
            """
    def _get_params(self):
        return np.hstack((self.variance, self.inv_lengthscale))
@ -109,7 +123,7 @@ class RBFInv(RBF):
                  target(q+1) += var_len3(q)*tmp*(-len2(q));
                }
                """
-                num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
+                num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim)
                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
            else:
                code = """
@ -125,7 +139,7 @@ class RBFInv(RBF):
                  target(q+1) += var_len3(q)*tmp*(-len2(q));
                }
                """
-                num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
+                num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim)
                # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
                weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
        else:
@ -133,7 +147,7 @@ class RBFInv(RBF):
    def dK_dX(self, dL_dK, X, X2, target):
        self._K_computations(X, X2)
-        if X2 is None:            
+        if X2 is None:
            _K_dist = 2*(X[:, None, :] - X[None, :, :])
        else:
            _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
@ -263,8 +277,8 @@ class RBFInv(RBF):
            self._Z, self._mu, self._S = Z, mu, S
    def weave_psi2(self, mu, Zhat):
-        N, input_dim = mu.shape
+        N, input_dim = int(mu.shape[0]), int(mu.shape[1])
-        num_inducing = Zhat.shape[0]
+        num_inducing = int(Zhat.shape[0])
        mudist = np.empty((N, num_inducing, num_inducing, input_dim))
        mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
@ -279,10 +293,16 @@ class RBFInv(RBF):
            inv_lengthscale2 = self.inv_lengthscale2
        else:
            inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
        if config.getboolean('parallel', 'openmp'):
            pragma_string = '#pragma omp parallel for private(tmp)'
        else:
            pragma_string = ''
        code = """
        double tmp;
-        #pragma omp parallel for private(tmp)
+        %s
        for (int n=0; n<N; n++){
            for (int m=0; m<num_inducing; m++){
               for (int mm=0; mm<(m+1); mm++){
@ -312,13 +332,9 @@ class RBFInv(RBF):
            }
        }
-        """
+        """ % pragma_string
-        support_code = """
+        weave.inline(code, support_code=self.weave_support_code,
        #include <omp.h>
        #include <math.h>
        """
        weave.inline(code, support_code=support_code, libraries=['gomp'],
                     arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
                     type_converters=weave.converters.blitz, **self.weave_options)
--- a/GPy/kern/parts/sympy_helpers.cpp
+++ b/GPy/kern/parts/sympy_helpers.cpp
@ -1,4 +1,7 @@
 #include <math.h>
 #include <float.h>
 #include <stdlib.h>
 double DiracDelta(double x){
  // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
    if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
@ -23,3 +26,36 @@ double sinc_grad(double x){
  else 
    return (x*cos(x) - sin(x))/(x*x);
 }
 double erfcx(double x){
  double xneg=-sqrt(log(DBL_MAX/2));
  double xmax = 1/(sqrt(M_PI)*DBL_MIN);
  xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
  // Find values where erfcx can be evaluated
  double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
  double u = t-0.5;
  double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u 
 	      - 0.003963850973605135)   * u - 8.70779635317295828e-4) * u 
 	    + 0.00773672528313526668) * u + 0.00383335126264887303) * u 
 	  - 0.0127223813782122755)  * u - 0.0133823644533460069)  * u 
 	+ 0.0161315329733252248)  * u + 0.0390976845588484035)  * u + 0.00249367200053503304;
  if (x<xneg)
    return -INFINITY;
  else if (x<0)
    return 2*exp(x*x)-y;
  else if (x>xmax)
    return 0.0;
  else 
    return y;
 }
 double ln_diff_erf(double x0, double x1){
  if (x0==x1)
    return INFINITY;
  else if(x0<0 && x1>0 || x0>0 && x1<0)
    return log(erf(x0)-erf(x1));
  else if(x1>0)
    return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
  else 
    return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
 }
--- a/GPy/kern/parts/sympy_helpers.h
+++ b/GPy/kern/parts/sympy_helpers.h
@ -4,3 +4,6 @@ double DiracDelta(double x, int foo);
 double sinc(double x);
 double sinc_grad(double x);
 double erfcx(double x);
 double ln_diff_erf(double x0, double x1);
--- a/GPy/kern/parts/sympykern.py
+++ b/GPy/kern/parts/sympykern.py
@ -9,6 +9,7 @@ import sys
 current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
 import tempfile
 import pdb
 import ast
 from kernpart import Kernpart
 class spkern(Kernpart):
@ -16,64 +17,388 @@ class spkern(Kernpart):
    A kernel object, where all the hard work in done by sympy.
    :param k: the covariance function
-    :type k: a positive definite sympy function of x1, z1, x2, z2...
+    :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
    To construct a new sympy kernel, you'll need to define:
     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
     - that's it! we'll extract the variables from the function k.
    Note:
-     - to handle multiple inputs, call them x1, z1, etc
+     - to handle multiple inputs, call them x_1, z_1, etc
-     - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
+     - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
    """
-    def __init__(self,input_dim,k,name=None,param=None):
+    def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
        if name is None:
            self.name='sympykern'
        else:
            self.name = name
        if k is None:
            raise ValueError, "You must provide an argument for the covariance function."
        self._sp_k = k
        sp_vars = [e for e in k.atoms() if e.is_Symbol]
-        self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
+        self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
-        self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
+        self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
-        assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
+        # Check that variable names make sense.
-        assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
+        assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
        assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
        assert len(self._sp_x)==len(self._sp_z)
        self.input_dim = len(self._sp_x)
        self._real_input_dim = self.input_dim
        if output_dim > 1:
            self.input_dim += 1
        assert self.input_dim == input_dim
-        self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
+        self.output_dim = output_dim
-        self.num_params = len(self._sp_theta)
+        # extract parameter names
        thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
-        #deal with param
+
-        if param is None:
+        # Look for parameters with index.
-            param = np.ones(self.num_params)
+        if self.output_dim>1:
-        assert param.size==self.num_params
+            self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
-        self._set_params(param)
+            self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
            # Make sure parameter appears with both indices!
            assert len(self._sp_theta_i)==len(self._sp_theta_j)
            assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
            # Extract names of shared parameters
            self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
            self.num_split_params = len(self._sp_theta_i)
            self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
            for theta in self._split_theta_names:
                setattr(self, theta, np.ones(self.output_dim))
            self.num_shared_params = len(self._sp_theta)
            self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
        else:
            self.num_split_params = 0
            self._split_theta_names = []
            self._sp_theta = thetas
            self.num_shared_params = len(self._sp_theta)
            self.num_params = self.num_shared_params
        for theta in self._sp_theta:
            val = 1.0
            if param is not None:
                if param.has_key(theta):
                    val = param[theta]
            setattr(self, theta.name, val)
        #deal with param            
        self._set_params(self._get_params())
        #Differentiate!
        self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
        if self.output_dim > 1:
            self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
        self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
        #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
-        #self.compute_psi_stats()
+        if False:
            self.compute_psi_stats()
        self._gen_code()
-        self.weave_kwargs = {\
+        if False:
-            'support_code':self._function_code,\
+            extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
-            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
+        else:
-            'headers':['"sympy_helpers.h"'],\
+            extra_compile_args = []
-            'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
+            
-            #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
+        self.weave_kwargs = {
-            'extra_compile_args':[],\
+            'support_code':self._function_code,
-            'extra_link_args':['-lgomp'],\
+            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
            'headers':['"sympy_helpers.h"'],
            'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
            'extra_compile_args':extra_compile_args,
            'extra_link_args':['-lgomp'],
            'verbose':True}
    def __add__(self,other):
        return spkern(self._sp_k+other._sp_k)
    def _gen_code(self):
        """Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
        #TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
        #generate c functions from sympy objects        
        argument_sequence = self._sp_x+self._sp_z+self._sp_theta
        code_list = [('k',self._sp_k)]
        # gradients with respect to covariance input
        code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
        # gradient with respect to parameters
        code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
        # gradient with respect to multiple output parameters
        if self.output_dim > 1:
            argument_sequence += self._sp_theta_i + self._sp_theta_j
            code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
        (foo_c,self._function_code), (foo_h,self._function_header) = \
                                     codegen(code_list, "C",'foobar',argument_sequence=argument_sequence)
        #put the header file where we can find it
        f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
        f.write(self._function_header)
        f.close()
        # Substitute any known derivatives which sympy doesn't compute
        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
        ############################################################
        # This is the basic argument construction for the C code.  #
        ############################################################
        arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
                    + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
        # for multiple outputs need to also provide these arguments reversed.
        if self.output_dim>1:
            reverse_arg_list = list(arg_list)
            reverse_arg_list.reverse()
        # Add in any 'shared' parameters to the list.
        param_arg_list = [shared_params.name for shared_params in self._sp_theta]
        arg_list += param_arg_list
        precompute_list=[]
        if self.output_dim > 1:
            reverse_arg_list+=list(param_arg_list)
            split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
            split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
            arg_list += split_param_arg_list
            reverse_arg_list += split_param_reverse_arg_list
            # Extract the right output indices from the inputs.
            c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
            precompute_list += c_define_output_indices
            reverse_arg_string = ", ".join(reverse_arg_list)
        arg_string = ", ".join(arg_list)
        precompute_string = "\n".join(precompute_list)
        # Code to compute argments string needed when only X is provided.
        X_arg_string = re.sub('Z','X',arg_string)
        # Code to compute argument string when only diagonal is required.
        diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
        diag_arg_string = re.sub('j','i',diag_arg_string)
        diag_precompute_string = precompute_list[0]
        # Here's the code to do the looping for K
        self._K_code =\
        """
        // _K_code
        // Code for computing the covariance function.
        int i;
        int j;
        int N = target_array->dimensions[0];
        int num_inducing = target_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<num_inducing;j++){
 %s
                //target[i*num_inducing+j] = 
                TARGET2(i, j) += k(%s);
            }
        }
        %s
        """%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        self._K_code_X = """
        // _K_code_X
        // Code for computing the covariance function.
        int i;
        int j;
        int N = target_array->dimensions[0];
        int num_inducing = target_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            %s // int ii=(int)X2(i, 1);
            TARGET2(i, i) += k(%s);
            for (j=0;j<i;j++){
              %s //int jj=(int)X2(j, 1);
              double kval = k(%s); //double kval = k(X2(i, 0), X2(j, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii), LENGTHSCALE1(jj), SCALE1(jj));
              TARGET2(i, j) += kval;
              TARGET2(j, i) += kval;
            }
        }
        /*%s*/
        """%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', precompute_list[1]), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
        # Code to do the looping for Kdiag
        self._Kdiag_code =\
        """
        // _Kdiag_code
        // Code for computing diagonal of covariance function.
        int i;
        int N = target_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for
        for (i=0;i<N;i++){
                %s
                //target[i] =
                TARGET1(i)=k(%s);
        }
        %s
        """%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Code to compute gradients
        grad_func_list = []
        if self.output_dim>1:
            grad_func_list += c_define_output_indices
            grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
            grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
        grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in  enumerate(self._sp_theta)])
        grad_func_string = '\n'.join(grad_func_list) 
        self._dK_dtheta_code =\
        """
        // _dK_dtheta_code
        // Code for computing gradient of covariance with respect to parameters.
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int num_inducing = partial_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<num_inducing;j++){
 %s
            }
        }
        %s
        """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
        # Code to compute gradients for Kdiag TODO: needs clean up
        diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
        diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
        diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
        diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
        self._dKdiag_dtheta_code =\
        """
        // _dKdiag_dtheta_code
        // Code for computing gradient of diagonal with respect to parameters.
        int i;
        int N = partial_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        for (i=0;i<N;i++){
                %s
        }
        %s
        """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
        gradX_func_list = []
        if self.output_dim>1:
            gradX_func_list += c_define_output_indices
        gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
        gradX_func_string = "\n".join(gradX_func_list)
        self._dK_dX_code = \
        """
        // _dK_dX_code
        // Code for computing gradient of covariance with respect to inputs.
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int num_inducing = partial_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N; i++){
          for (j=0; j<num_inducing; j++){
            %s
          }
        }
        %s
        """%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
        diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
        diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
        diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
        # Code for gradients of Kdiag wrt X
        self._dKdiag_dX_code= \
        """
        // _dKdiag_dX_code
        // Code for computing gradient of diagonal with respect to inputs.
        int N = partial_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        for (int i=0;i<N; i++){
            %s
        }
        %s
        """%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
        # string representation forces recompile when needed Get rid
        # of Zs in argument for diagonal. TODO: Why wasn't
        # diag_func_string called here? Need to check that.
        #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
        # Code to use when only X is provided. 
        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(')
        self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(')
        #TODO: insert multiple functions here via string manipulation
        #TODO: similar functions for psi_stats
    def _get_arg_names(self, Z=None, partial=None):
        arg_names = ['target','X']
        for shared_params in self._sp_theta:
            arg_names += [shared_params.name]
        if Z is not None:
            arg_names += ['Z']
        if partial is not None:
            arg_names += ['partial']
        if self.output_dim>1:
            arg_names += self._split_theta_names
            arg_names += ['output_dim']
        return arg_names
    def _weave_inline(self, code, X, target, Z=None, partial=None):
        output_dim = self.output_dim
        for shared_params in self._sp_theta:
            locals()[shared_params.name] = getattr(self, shared_params.name)
        # Need to extract parameters first
        for split_params in self._split_theta_names:
            locals()[split_params] = getattr(self, split_params)
        arg_names = self._get_arg_names(Z, partial)        
        weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
    def K(self,X,Z,target):        
        if Z is None:
            self._weave_inline(self._K_code_X, X, target)
        else:
            self._weave_inline(self._K_code, X, target, Z)
    def Kdiag(self,X,target):
        self._weave_inline(self._Kdiag_code, X, target)
    def dK_dtheta(self,partial,X,Z,target):
        if Z is None:
            self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
        else:
            self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
    def dKdiag_dtheta(self,partial,X,target):
        self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
    def dK_dX(self,partial,X,Z,target):
        if Z is None:
            self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
        else:
            self._weave_inline(self._dK_dX_code, X, target, Z, partial)
    def dKdiag_dX(self,partial,X,target):
        self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial)
    def compute_psi_stats(self):
        #define some normal distributions
-        mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)]
+        mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
-        Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)]
+        Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
        normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
        #do some integration!
@ -99,188 +424,29 @@ class spkern(Kernpart):
        self._sp_psi2 = self._sp_psi2.simplify()
-    def _gen_code(self):
+    def _set_params(self,param):        
-        #generate c functions from sympy objects
+        assert param.size == (self.num_params)
-        (foo_c,self._function_code),(foo_h,self._function_header) = \
+        for i, shared_params in enumerate(self._sp_theta):
-                codegen([('k',self._sp_k)] \
+            setattr(self, shared_params.name, param[i])
-                + [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
+            
-                #+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
+        if self.output_dim>1:
-                + [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
+            for i, split_params in enumerate(self._split_theta_names):
-                ,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
+                start = self.num_shared_params + i*self.output_dim
-        #put the header file where we can find it
+                end = self.num_shared_params + (i+1)*self.output_dim
-        f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
+                setattr(self, split_params, param[start:end])
        f.write(self._function_header)
        f.close()
        # Substitute any known derivatives which sympy doesn't compute
        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
        # Here's the code to do the looping for K
        arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
                            + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
                            + ["param[%i]"%i for i in range(self.num_params)])
        self._K_code =\
        """
        int i;
        int j;
        int N = target_array->dimensions[0];
        int num_inducing = target_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<num_inducing;j++){
                target[i*num_inducing+j] = k(%s);
            }
        }
        %s
        """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Similar code when only X is provided. 
        self._K_code_X = self._K_code.replace('Z[', 'X[')
        # Code to compute diagonal of covariance.
        diag_arglist = re.sub('Z','X',arglist)
        diag_arglist = re.sub('j','i',diag_arglist)
        # Code to do the looping for Kdiag
        self._Kdiag_code =\
        """
        int i;
        int N = target_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for
        for (i=0;i<N;i++){
                target[i] = k(%s);
        }
        %s
        """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Code to compute gradients
        funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
        self._dK_dtheta_code =\
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int num_inducing = partial_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<num_inducing;j++){
 %s
            }
        }
        %s
        """%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
        # Similar code when only X is provided, change argument lists.
        self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
        # Code to compute gradients for Kdiag TODO: needs clean up
        diag_funclist = re.sub('Z','X',funclist,count=0)
        diag_funclist = re.sub('j','i',diag_funclist)
        diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
        self._dKdiag_dtheta_code =\
        """
        int i;
        int N = partial_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        for (i=0;i<N;i++){
                %s
        }
        %s
        """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Code for gradients wrt X
        gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
        if False:
            gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
            if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
        self._dK_dX_code = \
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int num_inducing = partial_array->dimensions[1];
        int input_dim = X_array->dimensions[1];
        //#pragma omp parallel for private(j)
        for (i=0;i<N; i++){
          for (j=0; j<num_inducing; j++){
            %s
          }
        }
        %s
        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        # Create code for call when just X is passed as argument.
        self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
        diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
        diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
        diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
        # Code for gradients of Kdiag wrt X
        self._dKdiag_dX_code= \
        """
        int N = partial_array->dimensions[0];
        int input_dim = X_array->dimensions[1];
        for (int i=0;i<N; i++){
            %s
        }
        %s
        """%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
        # string representation forces recompile when needed Get rid
        # of Zs in argument for diagonal. TODO: Why wasn't
        # diag_funclist called here? Need to check that.
        #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
        #TODO: insert multiple functions here via string manipulation
        #TODO: similar functions for psi_stats
    def K(self,X,Z,target):
        param = self._param
        if Z is None:
            weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
        else:
            weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
    def Kdiag(self,X,target):
        param = self._param
        weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
    def dK_dtheta(self,partial,X,Z,target):
        param = self._param
        if Z is None:
            weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
        else:
            weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
    def dKdiag_dtheta(self,partial,X,target):
        param = self._param
        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
    def dK_dX(self,partial,X,Z,target):
        param = self._param
        if Z is None:
            weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
        else:
            weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
    def dKdiag_dX(self,partial,X,target):
        param = self._param
        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
    def _set_params(self,param):
        #print param.flags['C_CONTIGUOUS']
        self._param = param.copy()
    def _get_params(self):
-        return self._param
+        params = np.zeros(0)
        for shared_params in self._sp_theta:
            params = np.hstack((params, getattr(self, shared_params.name)))
        if self.output_dim>1:
            for split_params in self._split_theta_names:
                params = np.hstack((params, getattr(self, split_params).flatten()))
        return params
    def _get_param_names(self):
-        return [x.name for x in self._sp_theta]
+        if self.output_dim>1:
            return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i)  for x in self._sp_theta_i for i in range(self.output_dim)]
        else:
            return [x.name for x in self._sp_theta]
--- a/GPy/likelihoods/ep.py
+++ b/GPy/likelihoods/ep.py
@ -18,7 +18,7 @@ class EP(likelihood):
        self.data = data
        self.num_data, self.output_dim = self.data.shape
        self.is_heteroscedastic = True
-        self.Nparams = 0
+        self.num_params = 0
        self._transf_data = self.noise_model._preprocess_values(data)
        #Initial values - Likelihood approximation parameters:
--- a/GPy/likelihoods/ep_mixed_noise.py
+++ b/GPy/likelihoods/ep_mixed_noise.py
@ -31,7 +31,7 @@ class EP_Mixed_Noise(likelihood):
        self.data = np.vstack(data_list)
        self.N, self.output_dim = self.data.shape
        self.is_heteroscedastic = True
-        self.Nparams = 0#FIXME
+        self.num_params = 0#FIXME
        self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
        #TODO non-gaussian index
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -15,7 +15,7 @@ class Gaussian(likelihood):
    """
    def __init__(self, data, variance=1., normalize=False):
        self.is_heteroscedastic = False
-        self.Nparams = 1
+        self.num_params = 1
        self.Z = 0. # a correction factor which accounts for the approximation made
        N, self.output_dim = data.shape
--- a/GPy/likelihoods/gaussian_mixed_noise.py
+++ b/GPy/likelihoods/gaussian_mixed_noise.py
@ -23,14 +23,14 @@ class Gaussian_Mixed_Noise(likelihood):
    :type normalize: False|True
    """
    def __init__(self, data_list, noise_params=None, normalize=True):
-        self.Nparams = len(data_list)
+        self.num_params = len(data_list)
        self.n_list = [data.size for data in data_list]
-        self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
+        self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
        if noise_params is None:
-            noise_params = [1.] * self.Nparams
+            noise_params = [1.] * self.num_params
        else:
-            assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
+            assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
        self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
        self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
--- a/GPy/likelihoods/noise_models/binomial_noise.py
+++ b/GPy/likelihoods/noise_models/binomial_noise.py
@ -117,3 +117,16 @@ class Binomial(NoiseDistribution):
    def _d2variance_dgp2(self,gp):
        return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.
        :param size: number of samples to compute
        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@ -413,3 +413,13 @@ class NoiseDistribution(object):
        q1 = np.vstack(q1)
        q3 = np.vstack(q3)
        return pred_mean, pred_var, q1, q3
    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.
        :param gp: latent variable
        """
        pass
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -211,8 +211,8 @@ class MRD(Model):
 #         g.Z = Z.reshape(self.num_inducing, self.input_dim)
 #
 #     def _set_kern_params(self, g, p):
-#         g.kern._set_params(p[:g.kern.Nparam])
+#         g.kern._set_params(p[:g.kern.num_params])
-#         g.likelihood._set_params(p[g.kern.Nparam:])
+#         g.likelihood._set_params(p[g.kern.num_params:])
    def _set_params(self, x):
        start = 0; end = self.NQ
--- a/GPy/models/svigp_regression.py
+++ b/GPy/models/svigp_regression.py
@ -25,7 +25,7 @@ class SVIGPRegression(SVIGP):
    """
-    def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
+    def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False):
        # kern defaults to rbf (plus white for stability)
        if kernel is None:
            kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
@ -38,7 +38,7 @@ class SVIGPRegression(SVIGP):
            assert Z.shape[1] == X.shape[1]
        # likelihood defaults to Gaussian
-        likelihood = likelihoods.Gaussian(Y, normalize=False)
+        likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
        SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
        self.load_batch()
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -7,6 +7,13 @@ import GPy
 verbose = False
 try:
    import sympy
    SYMPY_AVAILABLE=True
 except ImportError:
    SYMPY_AVAILABLE=False
 class KernelTests(unittest.TestCase):
    def test_kerneltie(self):
        K = GPy.kern.rbf(5, ARD=True)
@ -22,7 +29,16 @@ class KernelTests(unittest.TestCase):
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
    def test_rbf_sympykernel(self):
-        kern = GPy.kern.rbf_sympy(5)
+        if SYMPY_AVAILABLE:
            kern = GPy.kern.rbf_sympy(5)
            self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
    def test_eq_sympykernel(self):
        kern = GPy.kern.eq_sympy(5, 3, output_ind=4)
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
    def test_sinckernel(self):
        kern = GPy.kern.sinc(5)
        self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
    def test_rbf_invkernel(self):
--- a/GPy/util/init.py
+++ b/GPy/util/init.py
@ -14,3 +14,5 @@ import visualize
 import decorators
 import classification
 import latent_space_visualizations
 import netpbmfile
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@ -0,0 +1,17 @@
 #
 # This loads the configuration
 #
 import ConfigParser
 import os
 config = ConfigParser.ConfigParser()
 user_file = os.path.join(os.getenv('HOME'),'.gpy_config.cfg')
 default_file = os.path.join('..','gpy_config.cfg')
 # 1. check if the user has a ~/.gpy_config.cfg
 if os.path.isfile(user_file):
    config.read(user_file)
 else:
    # 2. if not, use the default one
    path = os.path.dirname(__file__)
    config.read(os.path.join(path,default_file))
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -8,17 +8,12 @@ import zipfile
 import tarfile
 import datetime
-ipython_notebook = False
+ipython_available=True
-if ipython_notebook:
+try:
-    import IPython.core.display
+    import IPython
-    def ipynb_input(varname, prompt=''):
+except ImportError:
-        """Prompt user for input and assign string val to given variable name."""
+    ipython_available=False
-        js_code = ("""
+
            var value = prompt("{prompt}","");
            var py_code = "{varname} = '" + value + "'";
            IPython.notebook.kernel.execute(py_code);
        """).format(prompt=prompt, varname=varname)
        return IPython.core.display.Javascript(js_code)
 import sys, urllib
@ -34,8 +29,11 @@ data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
 overide_manual_authorize=False
 neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
 sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
 cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
-# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look.
+
 # Note: there may be a better way of storing data resources, for the
 # moment we are storing them in a dictionary.
 data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
                                       'files' : [['ankurDataPoseSilhouette.mat']],
                                       'license' : None,
@ -49,7 +47,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
                                      'license' : None,
                                      'size' : 51276
                                      },
-                  'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'],
+                  'brendan_faces' : {'urls' : [sam_url],
                                     'files': [['frey_rawface.mat']],
                                     'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
                                     'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
@ -93,6 +91,12 @@ The database was created with funding from NSF EIA-0196217.""",
                                            'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
                                            'license' : None,
                                            'size' : 21949154},
                  'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
                                      'files' : [['att_faces.zip'], ['olivettifaces.mat']],
                                            'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
                                            'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
                                            'license': None,
                                            'size' : 8561331},
                  'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
                                            'files' : [['olympicMarathonTimes.csv']],
                                            'citation' : None,
@ -141,26 +145,41 @@ The database was created with funding from NSF EIA-0196217.""",
                                        'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
                                        'license' : None,
                                        'size' : 24229368},
                  'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
                                        'files' : [['xw_pen_15.csv']],
                                        'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
                                        'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
                                        'license' : None,
                                        'size' : 3410}
                  }
-def prompt_user():
+def prompt_user(prompt):
    """Ask user for agreeing to data set licenses."""
    # raw_input returns the empty string for "enter"
    yes = set(['yes', 'y'])
    no = set(['no','n'])
-    choice = ''
+
-    if ipython_notebook:
+    try:
-        ipynb_input(choice, prompt='provide your answer here')
+        print(prompt)
    else:
        choice = raw_input().lower()
        # would like to test for exception here, but not sure if we can do that without importing IPython
    except: 
        print('Stdin is not implemented.')
        print('You need to set')
        print('overide_manual_authorize=True')
        print('to proceed with the download. Please set that variable and continue.')
        raise
    if choice in yes:
        return True
    elif choice in no:
        return False
    else:
-        sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
+        print("Your response was a " + choice)
-        return prompt_user()
+        print("Please respond with 'yes', 'y' or 'no', 'n'")
        #return prompt_user()
 def data_available(dataset_name=None):
@ -212,15 +231,14 @@ def authorize_download(dataset_name=None):
            print('You must also agree to the following license:')
            print(dr['license'])
            print('')
-        print('Do you wish to proceed with the download? [yes/no]')
+        return prompt_user('Do you wish to proceed with the download? [yes/no]')
        return prompt_user()
 def download_data(dataset_name=None):
    """Check with the user that the are happy with terms and conditions for the data set, then download it."""
    dr = data_resources[dataset_name]
    if not authorize_download(dataset_name):
-        return False
+        raise Exception("Permission to download data set denied.")
    if dr.has_key('suffices'):
        for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
@ -489,13 +507,13 @@ def ripley_synth(data_set='ripley_prnn_data'):
    return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
 def osu_run1(data_set='osu_run1', sample_every=4):
    path = os.path.join(data_path, data_set)
    if not data_available(data_set):
        download_data(data_set)
-    zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
+        zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r')
-    path = os.path.join(data_path, data_set)
+        for name in zip.namelist():
-    for name in zip.namelist():
+            zip.extract(name, path)
-        zip.extract(name, path)
+    Y, connect = GPy.util.mocap.load_text_data('Aug210106', path)
    Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
    Y = Y[0:-1:sample_every, :]
    return data_details_return({'Y': Y, 'connect' : connect}, data_set)
@ -579,8 +597,34 @@ def toy_linear_1d_classification(seed=default_seed):
    X = (np.r_[x1, x2])[:, None]
    return {'X': X, 'Y':  sample_class(2.*X), 'F': 2.*X, 'seed' : seed}
-def olympic_100m_men(data_set='rogers_girolami_data'):
+def olivetti_faces(data_set='olivetti_faces'):
    path = os.path.join(data_path, data_set)
    if not data_available(data_set):
        download_data(data_set)
        zip = zipfile.ZipFile(os.path.join(path, 'att_faces.zip'), 'r')
        for name in zip.namelist():
            zip.extract(name, path)
    Y = []
    lbls = []
    for subject in range(40):
        for image in range(10):
            image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm')
            Y.append(GPy.util.netpbmfile.imread(image_path).flatten())
            lbls.append(subject)
    Y = np.asarray(Y)
    lbls = np.asarray(lbls)[:, None]
    return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set)
 def xw_pen(data_set='xw_pen'):
    if not data_available(data_set):
        download_data(data_set)
    Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',')
    X = np.arange(485)[:, None]
    return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
 def download_rogers_girolami_data():
    if not data_available('rogers_girolami_data'):
        download_data(data_set)
        path = os.path.join(data_path, data_set)
        tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
@ -588,6 +632,9 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
        print('Extracting file.')
        tar.extractall(path=path)
        tar.close()
 def olympic_100m_men(data_set='rogers_girolami_data'):
    download_rogers_girolami_data()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100']
    X = olympic_data[:, 0][:, None]
@ -595,20 +642,45 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
    return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_100m_women(data_set='rogers_girolami_data'):
-    if not data_available(data_set):
+    download_rogers_girolami_data()
        download_data(data_set)
        path = os.path.join(data_path, data_set)
        tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
        tar = tarfile.open(tar_file)
        print('Extracting file.')
        tar.extractall(path=path)
        tar.close()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
    X = olympic_data[:, 0][:, None]
    Y = olympic_data[:, 1][:, None]
    return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_200m_women(data_set='rogers_girolami_data'):
    download_rogers_girolami_data()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female200']
    X = olympic_data[:, 0][:, None]
    Y = olympic_data[:, 1][:, None]
    return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_200m_men(data_set='rogers_girolami_data'):
    download_rogers_girolami_data()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male200']
    X = olympic_data[:, 0][:, None]
    Y = olympic_data[:, 1][:, None]
    return data_details_return({'X': X, 'Y': Y, 'info': "Male 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_400m_women(data_set='rogers_girolami_data'):
    download_rogers_girolami_data()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female400']
    X = olympic_data[:, 0][:, None]
    Y = olympic_data[:, 1][:, None]
    return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_400m_men(data_set='rogers_girolami_data'):
    download_rogers_girolami_data()
    olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male400']
    X = olympic_data[:, 0][:, None]
    Y = olympic_data[:, 1][:, None]
    return data_details_return({'X': X, 'Y': Y, 'info': "Male 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
 def olympic_marathon_men(data_set='olympic_marathon_men'):
    if not data_available(data_set):
        download_data(data_set)
@ -617,6 +689,26 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
    Y = olympics[:, 1:2]
    return data_details_return({'X': X, 'Y': Y}, data_set)
 def olympics():
    """All olympics sprint winning times for multiple output prediction."""
    X = np.zeros((0, 2))
    Y = np.zeros((0, 1))
    for i, dataset in enumerate([olympic_100m_men,
                              olympic_100m_women,
                              olympic_200m_men,
                              olympic_200m_women,
                              olympic_400m_men,
                              olympic_400m_women]):
        data = dataset()
        year = data['X']
        time = data['Y']
        X = np.vstack((X, np.hstack((year, np.ones_like(year)*i))))
        Y = np.vstack((Y, time))
    data['X'] = X
    data['Y'] = Y
    data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning."
    return data
 # def movielens_small(partNo=1,seed=default_seed):
 #     np.random.seed(seed=seed)
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -325,6 +325,7 @@ def symmetrify(A, upper=False):
    """
    N, M = A.shape
    assert N == M
    c_contig_code = """
    int iN;
    for (int i=1; i<N; i++){
@ -343,6 +344,8 @@ def symmetrify(A, upper=False):
      }
    }
    """
    N = int(N) # for safe type casting
    if A.flags['C_CONTIGUOUS'] and upper:
        weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
    elif A.flags['C_CONTIGUOUS'] and not upper:
@ -403,4 +406,3 @@ def backsub_both_sides(L, X, transpose='left'):
    else:
        tmp, _ = lapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=0)
        return lapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=0)[0].T
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -3,6 +3,7 @@
 import numpy as np
 from scipy import weave
 from config import *
 def opt_wrapper(m, **kwargs):
    """
@ -57,11 +58,18 @@ def kmm_init(X, m = 10):
    return X[inducing]
 def fast_array_equal(A, B):
    if config.getboolean('parallel', 'openmp'):
        pragma_string = '#pragma omp parallel for private(i, j)'
    else:
        pragma_string = ''
    code2="""
    int i, j;
    return_val = 1;
-    #pragma omp parallel for private(i, j)
+    %s
    for(i=0;i<N;i++){
       for(j=0;j<D;j++){
          if(A(i, j) != B(i, j)){
@ -70,13 +78,18 @@ def fast_array_equal(A, B):
          }
       }
    }
-    """
+    """ % pragma_string
    if config.getboolean('parallel', 'openmp'):
        pragma_string = '#pragma omp parallel for private(i, j, z)'
    else:
        pragma_string = ''
    code3="""
    int i, j, z;
    return_val = 1;
-    #pragma omp parallel for private(i, j, z)
+    %s
    for(i=0;i<N;i++){
       for(j=0;j<D;j++){
         for(z=0;z<Q;z++){
@ -87,35 +100,48 @@ def fast_array_equal(A, B):
          }
       }
    }
-    """
+    """ % pragma_string
    if config.getboolean('parallel', 'openmp'):
        pragma_string = '#include <omp.h>'
    else:
        pragma_string = ''
    support_code = """
-    #include <omp.h>
+    %s
    #include <math.h>
-    """
+    """ % pragma_string
    weave_options = {'headers'           : ['<omp.h>'],
                     'extra_compile_args': ['-fopenmp -O3'],
                     'extra_link_args'   : ['-lgomp']}
    weave_options_openmp = {'headers'           : ['<omp.h>'],
                            'extra_compile_args': ['-fopenmp -O3'],
                            'extra_link_args'   : ['-lgomp'],
                            'libraries': ['gomp']}
    weave_options_noopenmp = {'extra_compile_args': ['-O3']}
    if config.getboolean('parallel', 'openmp'):
        weave_options = weave_options_openmp
    else:
        weave_options = weave_options_noopenmp
    value = False
    if (A == None) and (B == None):
        return True
    elif ((A == None) and (B != None)) or ((A != None) and (B == None)):
        return False
    elif A.shape == B.shape:
        if A.ndim == 2:
-            N, D = A.shape
+            N, D = [int(i) for i in A.shape]
-            value = weave.inline(code2, support_code=support_code, libraries=['gomp'],
+            value = weave.inline(code2, support_code=support_code,
                                 arg_names=['A', 'B', 'N', 'D'],
-                                 type_converters=weave.converters.blitz,**weave_options)
+                                 type_converters=weave.converters.blitz, **weave_options)
        elif A.ndim == 3:
-            N, D, Q = A.shape
+            N, D, Q = [int(i) for i in A.shape]
-            value = weave.inline(code3, support_code=support_code, libraries=['gomp'],
+            value = weave.inline(code3, support_code=support_code,
                                 arg_names=['A', 'B', 'N', 'D', 'Q'],
-                                 type_converters=weave.converters.blitz,**weave_options)
+                                 type_converters=weave.converters.blitz, **weave_options)
        else:
            value = np.array_equal(A,B)
--- a/GPy/util/netpbmfile.py
+++ b/GPy/util/netpbmfile.py
@ -0,0 +1,331 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # netpbmfile.py
 # Copyright (c) 2011-2013, Christoph Gohlke
 # Copyright (c) 2011-2013, The Regents of the University of California
 # Produced at the Laboratory for Fluorescence Dynamics.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 # * Redistributions of source code must retain the above copyright
 #   notice, this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright
 #   notice, this list of conditions and the following disclaimer in the
 #   documentation and/or other materials provided with the distribution.
 # * Neither the name of the copyright holders nor the names of any
 #   contributors may be used to endorse or promote products derived
 #   from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """Read and write image data from respectively to Netpbm files.
 This implementation follows the Netpbm format specifications at
 http://netpbm.sourceforge.net/doc/. No gamma correction is performed.
 The following image formats are supported: PBM (bi-level), PGM (grayscale),
 PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only).
 :Author:
  `Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`_
 :Organization:
  Laboratory for Fluorescence Dynamics, University of California, Irvine
 :Version: 2013.01.18
 Requirements
 ------------
 * `CPython 2.7, 3.2 or 3.3 <http://www.python.org>`_
 * `Numpy 1.7 <http://www.numpy.org>`_
 * `Matplotlib 1.2 <http://www.matplotlib.org>`_  (optional for plotting)
 Examples
 --------
 >>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
 >>> imsave('_tmp.pgm', im1)
 >>> im2 = imread('_tmp.pgm')
 >>> assert numpy.all(im1 == im2)
 """
 from __future__ import division, print_function
 import sys
 import re
 import math
 from copy import deepcopy
 import numpy
 __version__ = '2013.01.18'
 __docformat__ = 'restructuredtext en'
 __all__ = ['imread', 'imsave', 'NetpbmFile']
 def imread(filename, *args, **kwargs):
    """Return image data from Netpbm file as numpy array.
    `args` and `kwargs` are arguments to NetpbmFile.asarray().
    Examples
    --------
    >>> image = imread('_tmp.pgm')
    """
    try:
        netpbm = NetpbmFile(filename)
        image = netpbm.asarray()
    finally:
        netpbm.close()
    return image
 def imsave(filename, data, maxval=None, pam=False):
    """Write image data to Netpbm file.
    Examples
    --------
    >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16)
    >>> imsave('_tmp.pgm', image)
    """
    try:
        netpbm = NetpbmFile(data, maxval=maxval)
        netpbm.write(filename, pam=pam)
    finally:
        netpbm.close()
 class NetpbmFile(object):
    """Read and write Netpbm PAM, PBM, PGM, PPM, files."""
    _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB',
              b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB',
              b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'}
    def __init__(self, arg=None, **kwargs):
        """Initialize instance from filename, open file, or numpy array."""
        for attr in ('header', 'magicnum', 'width', 'height', 'maxval',
                     'depth', 'tupltypes', '_filename', '_fh', '_data'):
            setattr(self, attr, None)
        if arg is None:
            self._fromdata([], **kwargs)
        elif isinstance(arg, basestring):
            self._fh = open(arg, 'rb')
            self._filename = arg
            self._fromfile(self._fh, **kwargs)
        elif hasattr(arg, 'seek'):
            self._fromfile(arg, **kwargs)
            self._fh = arg
        else:
            self._fromdata(arg, **kwargs)
    def asarray(self, copy=True, cache=False, **kwargs):
        """Return image data from file as numpy array."""
        data = self._data
        if data is None:
            data = self._read_data(self._fh, **kwargs)
            if cache:
                self._data = data
            else:
                return data
        return deepcopy(data) if copy else data
    def write(self, arg, **kwargs):
        """Write instance to file."""
        if hasattr(arg, 'seek'):
            self._tofile(arg, **kwargs)
        else:
            with open(arg, 'wb') as fid:
                self._tofile(fid, **kwargs)
    def close(self):
        """Close open file. Future asarray calls might fail."""
        if self._filename and self._fh:
            self._fh.close()
            self._fh = None
    def __del__(self):
        self.close()
    def _fromfile(self, fh):
        """Initialize instance from open file."""
        fh.seek(0)
        data = fh.read(4096)
        if (len(data) < 7) or not (b'0' < data[1:2] < b'8'):
            raise ValueError("Not a Netpbm file:\n%s" % data[:32])
        try:
            self._read_pam_header(data)
        except Exception:
            try:
                self._read_pnm_header(data)
            except Exception:
                raise ValueError("Not a Netpbm file:\n%s" % data[:32])
    def _read_pam_header(self, data):
        """Read PAM header and initialize instance."""
        regroups = re.search(
            b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
            b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
            b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
        self.header = regroups[0]
        self.magicnum = b'P7'
        for group in regroups[1:]:
            key, value = group.split()
            setattr(self, unicode(key).lower(), int(value))
        matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
        self.tupltypes = [s.split(None, 1)[1] for s in matches]
    def _read_pnm_header(self, data):
        """Read PNM header and initialize instance."""
        bpm = data[1:2] in b"14"
        regroups = re.search(b"".join((
            b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
            b"\s*(\d+)\s+(?:#.*[\r\n])*",
            b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
            b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
        self.header = regroups[0]
        self.magicnum = regroups[1]
        self.width = int(regroups[2])
        self.height = int(regroups[3])
        self.maxval = int(regroups[4])
        self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1
        self.tupltypes = [self._types[self.magicnum]]
    def _read_data(self, fh, byteorder='>'):
        """Return image data from open file as numpy array."""
        fh.seek(len(self.header))
        data = fh.read()
        dtype = 'u1' if self.maxval < 256 else byteorder + 'u2'
        depth = 1 if self.magicnum == b"P7 332" else self.depth
        shape = [-1, self.height, self.width, depth]
        size = numpy.prod(shape[1:])
        if self.magicnum in b"P1P2P3":
            data = numpy.array(data.split(None, size)[:size], dtype)
            data = data.reshape(shape)
        elif self.maxval == 1:
            shape[2] = int(math.ceil(self.width / 8))
            data = numpy.frombuffer(data, dtype).reshape(shape)
            data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :]
        else:
            data = numpy.frombuffer(data, dtype)
            data = data[:size * (data.size // size)].reshape(shape)
        if data.shape[0] < 2:
            data = data.reshape(data.shape[1:])
        if data.shape[-1] < 2:
            data = data.reshape(data.shape[:-1])
        if self.magicnum == b"P7 332":
            rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8)
            rgb332 *= [36, 36, 85]
            data = numpy.take(rgb332, data, axis=0)
        return data
    def _fromdata(self, data, maxval=None):
        """Initialize instance from numpy array."""
        data = numpy.array(data, ndmin=2, copy=True)
        if data.dtype.kind not in "uib":
            raise ValueError("not an integer type: %s" % data.dtype)
        if data.dtype.kind == 'i' and numpy.min(data) < 0:
            raise ValueError("data out of range: %i" % numpy.min(data))
        if maxval is None:
            maxval = numpy.max(data)
            maxval = 255 if maxval < 256 else 65535
        if maxval < 0 or maxval > 65535:
            raise ValueError("data out of range: %i" % maxval)
        data = data.astype('u1' if maxval < 256 else '>u2')
        self._data = data
        if data.ndim > 2 and data.shape[-1] in (3, 4):
            self.depth = data.shape[-1]
            self.width = data.shape[-2]
            self.height = data.shape[-3]
            self.magicnum = b'P7' if self.depth == 4 else b'P6'
        else:
            self.depth = 1
            self.width = data.shape[-1]
            self.height = data.shape[-2]
            self.magicnum = b'P5' if maxval > 1 else b'P4'
        self.maxval = maxval
        self.tupltypes = [self._types[self.magicnum]]
        self.header = self._header()
    def _tofile(self, fh, pam=False):
        """Write Netbm file."""
        fh.seek(0)
        fh.write(self._header(pam))
        data = self.asarray(copy=False)
        if self.maxval == 1:
            data = numpy.packbits(data, axis=-1)
        data.tofile(fh)
    def _header(self, pam=False):
        """Return file header as byte string."""
        if pam or self.magicnum == b'P7':
            header = "\n".join((
                "P7",
                "HEIGHT %i" % self.height,
                "WIDTH %i" % self.width,
                "DEPTH %i" % self.depth,
                "MAXVAL %i" % self.maxval,
                "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes),
                "ENDHDR\n"))
        elif self.maxval == 1:
            header = "P4 %i %i\n" % (self.width, self.height)
        elif self.depth == 1:
            header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval)
        else:
            header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval)
        if sys.version_info[0] > 2:
            header = bytes(header, 'ascii')
        return header
    def __str__(self):
        """Return information about instance."""
        return unicode(self.header)
 if sys.version_info[0] > 2:
    basestring = str
    unicode = lambda x: str(x, 'ascii')
 if __name__ == "__main__":
    # Show images specified on command line or all images in current directory
    from glob import glob
    from matplotlib import pyplot
    files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m')
    for fname in files:
        try:
            pam = NetpbmFile(fname)
            img = pam.asarray(copy=False)
            if False:
                pam.write('_tmp.pgm.out', pam=True)
                img2 = imread('_tmp.pgm.out')
                assert numpy.all(img == img2)
                imsave('_tmp.pgm.out', img)
                img2 = imread('_tmp.pgm.out')
                assert numpy.all(img == img2)
            pam.close()
        except ValueError as e:
            print(fname, e)
            continue
        _shape = img.shape
        if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)):
            img = img[0]
        cmap = 'gray' if pam.maxval > 1 else 'binary'
        pyplot.imshow(img, cmap, interpolation='nearest')
        pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum),
                                      _shape, img.dtype))
        pyplot.show()
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@ -1,32 +1,113 @@
-from sympy import Function, S, oo, I, cos, sin
+from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp
 class ln_diff_erf(Function):
    nargs = 2
    def fdiff(self, argindex=2):
        if argindex == 2:
            x0, x1 = self.args
            return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
        elif argindex == 1:
            x0, x1 = self.args
            return 2*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
        else:
            raise ArgumentIndexError(self, argindex)
    @classmethod
    def eval(cls, x0, x1):
        if x0.is_Number and x1.is_Number:            
            return log(erf(x0)-erf(x1))
 class sim_h(Function):
    nargs = 5
    def fdiff(self, argindex=1):
        pass
    @classmethod
    def eval(cls, t, tprime, d_i, d_j, l):
        # putting in the is_Number stuff forces it to look for a fdiff method for derivative.
        if (t.is_Number
            and tprime.is_Number
            and d_i.is_Number
            and d_j.is_Number
            and l.is_Number):
            if (t is S.NaN
                or tprime is S.NaN
                or d_i is S.NaN
                or d_j is S.NaN
                or l is S.NaN):
                return S.NaN
            else:
                return (exp((d_j/2*l)**2)/(d_i+d_j)
                        *(exp(-d_j*(tprime - t))
                          *(erf((tprime-t)/l - d_j/2*l)
                            + erf(t/l + d_j/2*l))
                          - exp(-(d_j*tprime + d_i))
                          *(erf(tprime/l - d_j/2*l)
                            + erf(d_j/2*l))))
 class erfc(Function):
    nargs = 1
    @classmethod
    def eval(cls, arg):
        return 1-erf(arg)
 class erfcx(Function):
    nargs = 1
    @classmethod
    def eval(cls, arg):
        return erfc(arg)*exp(arg*arg)
 class sinc_grad(Function):
    nargs = 1
    def fdiff(self, argindex=1):
-        return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
+        if argindex==1:
            # Strictly speaking this should be computed separately, as it won't work when x=0. See http://calculus.subwiki.org/wiki/Sinc_function
            return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
        else:
            raise ArgumentIndexError(self, argindex)
    @classmethod
    def eval(cls, x):
-        if x is S.Zero:
+        if x.is_Number:
-            return S.Zero
+            if x is S.NaN:
-        else:
+                return S.NaN
-            return (x*cos(x) - sin(x))/(x*x)
+            elif x is S.Zero:
                return S.Zero
            else:
                return (x*cos(x) - sin(x))/(x*x)
 class sinc(Function):
    nargs = 1
    def fdiff(self, argindex=1):
-        return sinc_grad(self.args[0])
+        if argindex==1:
            return sinc_grad(self.args[0])
        else:
            raise ArgumentIndexError(self, argindex)
    @classmethod
-    def eval(cls, x):
+    def eval(cls, arg):
-        if x is S.Zero:
+        if arg.is_Number:
-            return S.One
+            if arg is S.NaN:
-        else:
+                return S.NaN
-            return sin(x)/x
+            elif arg is S.Zero:
-    
+                return S.One
            else:
                return sin(arg)/arg
        if arg.func is asin:
            x = arg.args[0]
            return x / arg
    def _eval_is_real(self):
        return self.args[0].is_real
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@ -246,17 +246,36 @@ class lvm_dimselect(lvm):
 class image_show(matplotlib_show):
-    """Show a data vector as an image."""
+    """Show a data vector as an image. This visualizer rehapes the output vector and displays it as an image.
-    def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage=0):
+
    :param vals: the values of the output to display.
    :type vals: ndarray
    :param axes: the axes to show the output on.
    :type vals: axes handle
    :param dimensions: the dimensions that the image needs to be transposed to for display.
    :type dimensions: tuple
    :param transpose: whether to transpose the image before display.
    :type bool: default is False.
    :param order: whether array is in Fortan ordering ('F') or Python ordering ('C'). Default is python ('C').
    :type order: string
    :param invert: whether to invert the pixels or not (default False).
    :type invert: bool
    :param palette: a palette to use for the image.
    :param preset_mean: the preset mean of a scaled image.
    :type preset_mean: double
    :param preset_std: the preset standard deviation of a scaled image.
    :type preset_std: double"""
    def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0):
        matplotlib_show.__init__(self, vals, axes)
        self.dimensions = dimensions
        self.transpose = transpose
        self.order = order
        self.invert = invert
        self.scale = scale
        self.palette = palette
-        self.presetMean = presetMean
+        self.preset_mean = preset_mean
-        self.presetSTD = presetSTD
+        self.preset_std = preset_std
-        self.selectImage = selectImage # This is used when the y vector contains multiple images concatenated.
+        self.select_image = select_image # This is used when the y vector contains multiple images concatenated.
        self.set_image(self.vals)
        if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
@ -272,22 +291,22 @@ class image_show(matplotlib_show):
    def set_image(self, vals):
        dim = self.dimensions[0] * self.dimensions[1]
-        nImg = np.sqrt(vals[0,].size/dim)
+        num_images = np.sqrt(vals[0,].size/dim)
-        if nImg > 1 and nImg.is_integer(): # Show a mosaic of images
+        if num_images > 1 and num_images.is_integer(): # Show a mosaic of images
-            nImg = np.int(nImg)
+            num_images = np.int(num_images)
-            self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg))
+            self.vals = np.zeros((self.dimensions[0]*num_images, self.dimensions[1]*num_images))
-            for iR in range(nImg):
+            for iR in range(num_images):
-                for iC in range(nImg):
+                for iC in range(num_images):
-                    currImgId = iR*nImg + iC
+                    cur_img_id = iR*num_images + iC
-                    currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F')
+                    cur_img = np.reshape(vals[0,dim*cur_img_id+np.array(range(dim))], self.dimensions, order=self.order)
-                    firstRow = iR*self.dimensions[0]
+                    first_row = iR*self.dimensions[0]
-                    lastRow = (iR+1)*self.dimensions[0]
+                    last_row = (iR+1)*self.dimensions[0]
-                    firstCol = iC*self.dimensions[1]
+                    first_col = iC*self.dimensions[1]
-                    lastCol = (iC+1)*self.dimensions[1]
+                    last_col = (iC+1)*self.dimensions[1]
-                    self.vals[firstRow:lastRow, firstCol:lastCol] = currImg
+                    self.vals[first_row:last_row, first_col:last_col] = cur_img
        else: 
-            self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F')
+            self.vals = np.reshape(vals[0,dim*self.select_image+np.array(range(dim))], self.dimensions, order=self.order)
        if self.transpose:
            self.vals = self.vals.T
        # if not self.scale:
@ -296,8 +315,8 @@ class image_show(matplotlib_show):
            self.vals = -self.vals
        # un-normalizing, for visualisation purposes:
-        if self.presetSTD >= 0: # The Mean is assumed to be in the range (0,255)
+        if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255)
-            self.vals = self.vals*self.presetSTD + self.presetMean
+            self.vals = self.vals*self.preset_std + self.preset_mean
            # Clipping the values:
            self.vals[self.vals < 0] = 0
            self.vals[self.vals > 255] = 255