diff --git a/.gitignore b/.gitignore index d494630f..7ca1dd25 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,4 @@ iterate.dat # git merge files # ################### -*.orig \ No newline at end of file +*.orig diff --git a/.travis.yml b/.travis.yml index fb8ddb2c..1f796285 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ virtualenv: system_site_packages: true # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -before_install: +before_install: - sudo apt-get install -qq python-scipy python-pip - sudo apt-get install -qq python-matplotlib # Workaround for a permissions issue with Travis virtual machine images @@ -17,10 +17,10 @@ before_install: - sudo ln -s /run/shm /dev/shm install: - - pip install --upgrade numpy==1.7.1 - - pip install sphinx + - pip install --upgrade numpy==1.7.1 + - pip install sphinx - pip install nose - pip install . --use-mirrors # command to run tests, e.g. python setup.py test -script: +script: - nosetests GPy/testing diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index c5350271..97b4fb1d 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -126,7 +126,7 @@ class FITC(SparseGP): self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) # the partial derivative vector for the likelihood - if self.likelihood.Nparams == 0: + if self.likelihood.num_params == 0: # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 67eb7c69..0d1b69a0 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -6,7 +6,7 @@ import numpy as np import pylab as pb from .. import kern from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs -from ..likelihoods import EP +from ..likelihoods import EP, Laplace from gp_base import GPBase class GP(GPBase): @@ -25,20 +25,23 @@ class GP(GPBase): """ def __init__(self, X, likelihood, kernel, normalize_X=False): GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) - self._set_params(self._get_params()) + self.update_likelihood_approximation() - def getstate(self): - return GPBase.getstate(self) - - def setstate(self, state): - GPBase.setstate(self, state) - self._set_params(self._get_params()) def _set_params(self, p): - self.kern._set_params_transformed(p[:self.kern.num_params_transformed()]) - self.likelihood._set_params(p[self.kern.num_params_transformed():]) + new_kern_params = p[:self.kern.num_params_transformed()] + new_likelihood_params = p[self.kern.num_params_transformed():] + old_likelihood_params = self.likelihood._get_params() + + self.kern._set_params_transformed(new_kern_params) + self.likelihood._set_params_transformed(new_likelihood_params) self.K = self.kern.K(self.X) + + #Re fit likelihood approximation (if it is an approx), as parameters have changed + if isinstance(self.likelihood, Laplace): + self.likelihood.fit_full(self.K) + self.K += self.likelihood.covariance_matrix self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) @@ -55,6 +58,10 @@ class GP(GPBase): tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki) + #Adding dZ_dK (0 for a non-approximate likelihood, compensates for + #additional gradients of K when log-likelihood has non-zero Z term) + self.dL_dK += self.likelihood.dZ_dK + def _get_params(self): return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) @@ -94,19 +101,13 @@ class GP(GPBase): return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - 0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z) - def _log_likelihood_gradients(self): """ The gradient of all parameters. Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ - #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) - if not isinstance(self.likelihood,EP): - tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) - else: - tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) - return tmp + return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): """ @@ -193,3 +194,11 @@ class GP(GPBase): """ Xnew = self._add_output_index(Xnew, output) return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args) + + def getstate(self): + return GPBase.getstate(self) + + def setstate(self, state): + GPBase.setstate(self, state) + self._set_params(self._get_params()) + diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 083f9980..10d30358 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise class GPBase(Model): """ Gaussian process base model for holding shared behaviour between - sparse_GP and GP models. + sparse_GP and GP models, and potentially other models in the future. + + Here we define some functions that are use """ def __init__(self, X, likelihood, kernel, normalize_X=False): self.X = X @@ -34,29 +36,6 @@ class GPBase(Model): # All leaf nodes should call self._set_params(self._get_params()) at # the end - def getstate(self): - """ - Get the current state of the class, here we return everything that is needed to recompute the model. - """ - return Model.getstate(self) + [self.X, - self.num_data, - self.input_dim, - self.kern, - self.likelihood, - self.output_dim, - self._Xoffset, - self._Xscale] - - def setstate(self, state): - self._Xscale = state.pop() - self._Xoffset = state.pop() - self.output_dim = state.pop() - self.likelihood = state.pop() - self.kern = state.pop() - self.input_dim = state.pop() - self.num_data = state.pop() - self.X = state.pop() - Model.setstate(self, state) def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): """ @@ -110,90 +89,43 @@ class GPBase(Model): return Ysim - def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def plot_f(self, *args, **kwargs): """ - Plot the GP's view of the world, where the data is normalized and the - - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted function - - Not implemented in higher dimensions + Plot the GP's view of the world, where the data is normalized and before applying a likelihood. - :param samples: the number of a posteriori samples to plot - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param full_cov: - :type full_cov: bool - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle + This is a convenience function: we simply call self.plot with the + argument use_raw_predict set True. All args and kwargs are passed on to + plot. - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) + see also: gp_base.plot """ - if which_data == 'all': - which_data = slice(None) - - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - - if self.X.shape[1] == 1: - resolution = resolution or 200 - Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) - - m, v = self._raw_predict(Xnew, which_parts=which_parts) - if samples: - Ysim = self.posterior_samples_f(Xnew, samples, which_parts=which_parts, full_cov=True) - for yi in Ysim.T: - ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) - gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) - - ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) - ax.set_xlim(xmin, xmax) - ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 2: - - resolution = resolution or 50 - Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) - m, v = self._raw_predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) - - if samples: - warnings.warn("Samples only implemented for 1 dimensional inputs.") - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): - """ - Plot the GP with noise where the likelihood is Gaussian. + kwargs['plot_raw'] = True + self.plot(*args, **kwargs) + def plot(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', which_parts='all', fixed_inputs=[], + levels=20, samples=0, fignum=None, ax=None, resolution=None, + plot_raw=False, + linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In two dimsensions, a contour-plot shows the mean predicted function - - Not implemented in higher dimensions + - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data and which_functions + using which_data_rowsm which_data_ycols and which_parts :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :type plot_limits: np.array - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice self.X, self.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers :param which_parts: which of the kernel functions to plot (additively) :type which_parts: 'all', or list of bools + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D :type resolution: int :param levels: number of levels to plot in a contour plot. @@ -205,216 +137,138 @@ class GPBase(Model): :param ax: axes to plot on. :type ax: axes handle :type output: integer (first output is 0) - :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. - :type fixed_inputs: a list of tuples :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ - if which_data == 'all': - which_data = slice(None) - + #deal with optional arguments + if which_data_rows == 'all': + which_data_rows = slice(None) + if which_data_ycols == 'all': + which_data_ycols = np.arange(self.output_dim) + if len(which_data_ycols)==0: + raise ValueError('No data selected for plotting') if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - plotdims = self.input_dim - len(fixed_inputs) - if plotdims == 1: + #work out what the inputs are for plotting (1D or 2D) + fixed_dims = np.array([i for i,v in fixed_inputs]) + free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims) + + #one dimensional plotting + if len(free_dims) == 1: + + #define the frame on which to plot resolution = resolution or 200 - Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now - - fixed_dims = np.array([i for i,v in fixed_inputs]) - freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) - - Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits) + Xnew, xmin, xmax = x_frame1D(Xu[:,free_dims], plot_limits=plot_limits) Xgrid = np.empty((Xnew.shape[0],self.input_dim)) - Xgrid[:,freedim] = Xnew + Xgrid[:,free_dims] = Xnew for i,v in fixed_inputs: Xgrid[:,i] = v - m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts) + #make a prediction on the frame and plot it + if plot_raw: + m, v = self._raw_predict(Xgrid, which_parts=which_parts) + lower = m - 2*np.sqrt(v) + upper = m + 2*np.sqrt(v) + Y = self.likelihood.Y + else: + m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts) + Y = self.likelihood.data + for d in which_data_ycols: + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) + ax.plot(Xu[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) + #optionally plot some samples if samples: #NOTE not tested with fixed_inputs Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True) for yi in Ysim.T: ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + #set the limits of the plot to some sensible values + ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: + #2D plotting + elif len(free_dims) == 2: + #define the frame for plotting on resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now + Xnew, _, _, xmin, xmax = x_frame2D(Xu[:,free_dims], plot_limits, resolution) + Xgrid = np.empty((Xnew.shape[0],self.input_dim)) + Xgrid[:,free_dims] = Xnew + for i,v in fixed_inputs: + Xgrid[:,i] = v x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - Yf = self.likelihood.Y.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable + + #predict on the frame and plot + if use_raw_predict: + m, _ = self._raw_predict(Xgrid, which_parts=which_parts) + Y = self.likelihood.Y + else: + m, _, _, _ = self.predict(Xgrid, which_parts=which_parts) + Y = self.likelihood.data + for d in which_data_ycols: + m_d = m[:,d].reshape(resolution, resolution).T + ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) + ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + + #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) if samples: - warnings.warn("Samples only implemented for 1 dimensional inputs.") + warnings.warn("Samples are rather difficult to plot for 2D inputs...") else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def getstate(self): """ - For a specific output, in a multioutput model, this function works just as plot_f on single output models. - - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) - :param samples: the number of a posteriori samples to plot - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param full_cov: - :type full_cov: bool - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle + Get the curent state of the class. This is only used to efficiently + pickle the model. See also self.setstate """ - assert output is not None, "An output must be specified." - assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1) + return Model.getstate(self) + [self.X, + self.num_data, + self.input_dim, + self.kern, + self.likelihood, + self.output_dim, + self._Xoffset, + self._Xscale] - if which_data == 'all': - which_data = slice(None) - - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - - if self.X.shape[1] == 2: - Xu = self.X[self.X[:,-1]==output ,0:1] - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - Xnew_indexed = self._add_output_index(Xnew,output) - - m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts) - - if samples: - Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True) - for yi in Ysim.T: - ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) - - gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) - ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) - ax.set_xlim(xmin, xmax) - ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 3: - raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" - #if samples: - # warnings.warn("Samples only implemented for 1 dimensional inputs.") - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - - def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + def setstate(self, state): """ - For a specific output, in a multioutput model, this function works just as plot_f on single output models. - - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :type plot_limits: np.array - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param levels: number of levels to plot in a contour plot. - :type levels: int - :param samples: the number of a posteriori samples to plot - :type samples: int - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle - :type output: integer (first output is 0) - :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. - :type fixed_inputs: a list of tuples - :param linecol: color of line to plot. - :type linecol: - :param fillcol: color of fill - :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + Set the state of the model. Used for efficient pickling """ - assert output is not None, "An output must be specified." - assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1) - if which_data == 'all': - which_data = slice(None) + self._Xscale = state.pop() + self._Xoffset = state.pop() + self.output_dim = state.pop() + self.likelihood = state.pop() + self.kern = state.pop() + self.input_dim = state.pop() + self.num_data = state.pop() + self.X = state.pop() + Model.setstate(self, state) - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - - if self.X.shape[1] == 2: - resolution = resolution or 200 - - Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest - Xu = self.X * self._Xscale + self._Xoffset - Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column - - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - Xnew_indexed = self._add_output_index(Xnew,output) - - - m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output) - - if samples: #NOTE not tested with fixed_inputs - Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output) - for yi in Ysim.T: - ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) - - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 3: - raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" - #if samples: - # warnings.warn("Samples only implemented for 1 dimensional inputs.") - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - - def _add_output_index(self,X,output): + def log_predictive_density(self, x_test, y_test): """ - In a multioutput model, appends an index column to X to specify the output it is related to. + Calculation of the log predictive density - :param X: Input data - :type X: np.ndarray, N x self.input_dim - :param output: output X is related to - :type output: integer in {0,..., output_dim-1} + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) - .. Note:: For multiple non-independent outputs models only. + :param x_test: test observations (x_{*}) + :type x_test: (Nx1) array + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array """ - - assert hasattr(self,'multioutput'), 'This function is for multiple output models only.' - - index = np.ones((X.shape[0],1))*output - return np.hstack((X,index)) + mu_star, var_star = self._raw_predict(x_test) + return self.likelihood.log_predictive_density(y_test, mu_star, var_star) diff --git a/GPy/core/model.py b/GPy/core/model.py index 7aff8f4d..95d4565d 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -251,7 +251,7 @@ class Model(Parameterized): self._set_params_transformed(initial_parameters) def ensure_default_constraints(self): - """ + """ Ensure that any variables which should clearly be positive have been constrained somehow. The method performs a regular expression search on parameter names looking for the terms @@ -259,7 +259,7 @@ class Model(Parameterized): these terms are present in the name the parameter is constrained positive. """ - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] + positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa'] # param_names = self._get_param_names() currently_constrained = self.all_constrained_indices() to_make_positive = [] @@ -274,7 +274,7 @@ class Model(Parameterized): """ The objective function passed to the optimizer. It combines the likelihood and the priors. - + Failures are handled robustly. The algorithm will try several times to return the objective, and will raise the original exception if it the objective cannot be computed. @@ -462,7 +462,7 @@ class Model(Parameterized): numerical_gradient = (f1 - f2) / (2 * dx) global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) - + return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) else: # check the gradient of each parameter individually, and do some pretty printing @@ -547,9 +547,9 @@ class Model(Parameterized): :param stop_crit: convergence criterion :type stop_crit: float - .. Note: kwargs are passed to update_likelihood and optimize functions. + .. Note: kwargs are passed to update_likelihood and optimize functions. """ - assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" + assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods" ll_change = stop_crit + 1. iteration = 0 last_ll = -np.inf diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index d4b33ed2..5e381110 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -52,23 +52,6 @@ class SparseGP(GPBase): self._const_jitter = None - def getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return GPBase.getstate(self) + [self.Z, - self.num_inducing, - self.has_uncertain_inputs, - self.X_variance] - - def setstate(self, state): - self.X_variance = state.pop() - self.has_uncertain_inputs = state.pop() - self.num_inducing = state.pop() - self.Z = state.pop() - GPBase.setstate(self, state) - def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation self.Kmm = self.kern.K(self.Z) @@ -87,7 +70,6 @@ class SparseGP(GPBase): # factor Kmm self._Lm = jitchol(self.Kmm + self._const_jitter) - # TODO: no white kernel needed anymore, all noise in likelihood -------- # The rather complex computations of self._A if self.has_uncertain_inputs: @@ -156,7 +138,7 @@ class SparseGP(GPBase): # the partial derivative vector for the likelihood - if self.likelihood.Nparams == 0: + if self.likelihood.num_params == 0: # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: @@ -341,7 +323,10 @@ class SparseGP(GPBase): return mean, var, _025pm, _975pm - def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def plot_f(self, samples=0, plot_limits=None, which_data_rows='all', + which_data_ycols='all', which_parts='all', resolution=None, + full_cov=False, fignum=None, ax=None): + """ Plot the GP's view of the world, where the data is normalized and the - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -350,8 +335,8 @@ class SparseGP(GPBase): :param samples: the number of a posteriori samples to plot :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_data_rows: which if the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice self.X, self.Y :param which_parts: which of the kernel functions to plot (additively) :type which_parts: 'all', or list of bools :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D @@ -371,10 +356,10 @@ class SparseGP(GPBase): ax = fig.add_subplot(111) if fignum is None and ax is None: fignum = fig.num - if which_data is 'all': - which_data = slice(None) + if which_data_rows is 'all': + which_data_rows = slice(None) - GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax) + GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data_rows=which_data_rows, which_data_ycols=which_data_ycols, which_parts=which_parts, resolution=resolution, fignum=fignum, ax=ax) if self.X.shape[1] == 1: if self.has_uncertain_inputs: @@ -389,177 +374,98 @@ class SparseGP(GPBase): Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu[:, 0], Zu[:, 1], 'wo') - else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): + def plot(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', which_parts='all', fixed_inputs=[], + plot_raw=False, + levels=20, samples=0, fignum=None, ax=None, resolution=None): + """ + Plot the posterior of the sparse GP. + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. + + Can plot only part of the data and part of the posterior functions + using which_data_rowsm which_data_ycols and which_parts + + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice self.X, self.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param which_parts: which of the kernel functions to plot (additively) + :type which_parts: 'all', or list of bools + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :type output: integer (first output is 0) + :param linecol: color of line to plot. + :type linecol: + :param fillcol: color of fill + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + """ + #deal work out which ax to plot on if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if fignum is None and ax is None: - fignum = fig.num - if which_data is 'all': - which_data = slice(None) - GPBase.plot(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax) + #work out what the inputs are for plotting (1D or 2D) + fixed_dims = np.array([i for i,v in fixed_inputs]) + free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims) - if self.X.shape[1] == 1: + #call the base plotting + GPBase.plot(self, samples=samples, plot_limits=plot_limits, + which_data_rows=which_data_rows, + which_data_ycols=which_data_ycols, fixed_inputs=fixed_inputs, + which_parts=which_parts, resolution=resolution, levels=20, + fignum=fignum, ax=ax) + + if len(free_dims) == 1: + #plot errorbars for the uncertain inputs if self.has_uncertain_inputs: Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], - xerr=2 * np.sqrt(self.X_variance[which_data, 0]), + ax.errorbar(Xu[which_data_rows, 0], self.likelihood.data[which_data_rows, 0], + xerr=2 * np.sqrt(self.X_variance[which_data_rows, 0]), ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + + #plot the inducing inputs Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) - elif self.X.shape[1] == 2: + elif len(free_dims) == 2: Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu[:, 0], Zu[:, 1], 'wo') - else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + def getstate(self): """ - For a specific output, predict the function at the new point(s) Xnew. - - :param Xnew: The points at which to make a prediction - :type Xnew: np.ndarray, Nnew x self.input_dim - :param output: output to predict - :type output: integer in {0,..., num_outputs-1} - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) - :param full_cov: whether to return the full covariance matrix, or just the diagonal - :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.input_dim - :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise - :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - - .. Note:: For multiple output models only + Get the current state of the class, + here just all the indices, rest can get recomputed """ + return GPBase.getstate(self) + [self.Z, + self.num_inducing, + self.has_uncertain_inputs, + self.X_variance] - assert hasattr(self,'multioutput') - index = np.ones_like(Xnew)*output - Xnew = np.hstack((Xnew,index)) - - # normalize X values - Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale - mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) - - # now push through likelihood - mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) - return mean, var, _025pm, _975pm - - def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False): - """ - Internal helper function for making predictions for a specific output, - does not account for normalization or likelihood - --------- - - :param Xnew: The points at which to make a prediction - :type Xnew: np.ndarray, Nnew x self.input_dim - :param output: output to predict - :type output: integer in {0,..., num_outputs-1} - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) - :param full_cov: whether to return the full covariance matrix, or just the diagonal - - .. Note:: For multiple output models only - """ - Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! - symmetrify(Bi) - Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi) - - if self.Cpsi1V is None: - psi1V = np.dot(self.psi1.T,self.likelihood.V) - tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) - tmp, _ = dpotrs(self.LB, tmp, lower=1) - self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1) - - assert hasattr(self,'multioutput') - index = np.ones_like(_Xnew)*output - _Xnew = np.hstack((_Xnew,index)) - - if X_variance_new is None: - Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts) - mu = np.dot(Kx.T, self.Cpsi1V) - if full_cov: - Kxx = self.kern.K(_Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting - else: - Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) - else: - Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) - mu = np.dot(Kx, self.Cpsi1V) - if full_cov: - raise NotImplementedError, "TODO" - else: - Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new) - psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new) - var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - - return mu, var[:, None] + def setstate(self, state): + self.X_variance = state.pop() + self.has_uncertain_inputs = state.pop() + self.num_inducing = state.pop() + self.Z = state.pop() + GPBase.setstate(self, state) - def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): - - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - if fignum is None and ax is None: - fignum = fig.num - if which_data is 'all': - which_data = slice(None) - - GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax) - - if self.X.shape[1] == 2: - if self.has_uncertain_inputs: - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], - xerr=2 * np.sqrt(self.X_variance[which_data, 0]), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) - Zu = self.Z * self._Xscale + self._Xoffset - Zu = Zu[Zu[:,1]==output,0:1] - ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) - - elif self.X.shape[1] == 2: - Zu = self.Z * self._Xscale + self._Xoffset - Zu = Zu[Zu[:,1]==output,0:2] - ax.plot(Zu[:, 0], Zu[:, 1], 'wo') - - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - if fignum is None and ax is None: - fignum = fig.num - if which_data is 'all': - which_data = slice(None) - - GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output) - - if self.X.shape[1] == 2: - if self.has_uncertain_inputs: - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], - xerr=2 * np.sqrt(self.X_variance[which_data, 0]), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) - Zu = self.Z * self._Xscale + self._Xoffset - Zu = Zu[Zu[:,1]==output,0:1] - ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) - - elif self.X.shape[1] == 3: - Zu = self.Z * self._Xscale + self._Xoffset - Zu = Zu[Zu[:,1]==output,0:1] - ax.plot(Zu[:, 0], Zu[:, 1], 'wo') - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py index b9101160..9f27f465 100644 --- a/GPy/core/svigp.py +++ b/GPy/core/svigp.py @@ -18,30 +18,16 @@ class SVIGP(GPBase): Stochastic Variational inference in a Gaussian Process :param X: inputs - :type X: np.ndarray (N x Q) + :type X: np.ndarray (num_data x num_inputs) :param Y: observed data - :type Y: np.ndarray of observations (N x D) - :param batchsize: the size of a h - - Additional kwargs are used as for a sparse GP. They include: - + :type Y: np.ndarray of observations (num_data x output_dim) + :param batchsize: the size of a minibatch :param q_u: canonical parameters of the distribution squasehd into a 1D array :type q_u: np.ndarray - :param M: Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int :param kernel: the kernel/covariance function. See link kernels :type kernel: a GPy kernel - :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x Q) | None - :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) - :type X_uncertainty: np.ndarray (N x Q) | None - :param Zslices: slices for the inducing inputs (see slicing TODO: link) - :param M: Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int - :param beta: noise precision. TODO: ignore beta if doing EP - :type beta: float - :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales) - :type normalize_(X|Y): bool + :param Z: inducing inputs + :type Z: np.ndarray (num_inducing x num_inputs) """ @@ -350,8 +336,8 @@ class SVIGP(GPBase): #callback if i and not i%callback_interval: - callback() - time.sleep(0.1) + callback(self) # Change this to callback() + time.sleep(0.01) if self.epochs > 10: self._adapt_steplength() @@ -367,13 +353,13 @@ class SVIGP(GPBase): assert self.vb_steplength > 0 if self.adapt_param_steplength: - # self._adaptive_param_steplength() + self._adaptive_param_steplength() # self._adaptive_param_steplength_log() - self._adaptive_param_steplength_from_vb() + # self._adaptive_param_steplength_from_vb() self._param_steplength_trace.append(self.param_steplength) def _adaptive_param_steplength(self): - decr_factor = 0.1 + decr_factor = 0.02 g_tp = self._transform_gradients(self._log_likelihood_gradients()) self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp) @@ -407,7 +393,7 @@ class SVIGP(GPBase): self.tau_t = self.tau_t*(1-self.vb_steplength) + 1 def _adaptive_vb_steplength_KL(self): - decr_factor = 1 #0.1 + decr_factor = 0.1 natgrad = self.vb_grad_natgrad() g_t1 = natgrad[0] g_t2 = natgrad[1] diff --git a/GPy/core/variational.py b/GPy/core/variational.py new file mode 100644 index 00000000..74287dcf --- /dev/null +++ b/GPy/core/variational.py @@ -0,0 +1,19 @@ +''' +Created on 6 Nov 2013 + +@author: maxz +''' +from parameterized import Parameterized +from parameter import Param + +class Normal(Parameterized): + ''' + Normal distribution for variational approximations. + + holds the means and variances for a factorizing multivariate normal distribution + ''' + def __init__(self, name, means, variances): + Parameterized.__init__(self, name=name) + self.means = Param("mean", means) + self.variances = Param('variance', variances) + self.add_parameters(self.means, self.variances) \ No newline at end of file diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index da2ffb24..05b6af74 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -43,7 +43,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None): def toy_linear_1d_classification(seed=default_seed): """ - Simple 1D classification example + Simple 1D classification example using EP approximation :param seed: seed value for data generation (default is 4). :type seed: int @@ -61,6 +61,7 @@ def toy_linear_1d_classification(seed=default_seed): #m.update_likelihood_approximation() # Parameters optimization: #m.optimize() + #m.update_likelihood_approximation() m.pseudo_EM() # Plot @@ -71,6 +72,41 @@ def toy_linear_1d_classification(seed=default_seed): return m +def toy_linear_1d_classification_laplace(seed=default_seed): + """ + Simple 1D classification example using Laplace approximation + + :param seed: seed value for data generation (default is 4). + :type seed: int + + """ + + data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) + Y = data['Y'][:, 0:1] + Y[Y.flatten() == -1] = 0 + + bern_noise_model = GPy.likelihoods.bernoulli() + laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model) + + # Model definition + m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood) + + print m + # Optimize + #m.update_likelihood_approximation() + # Parameters optimization: + m.optimize('bfgs', messages=1) + #m.pseudo_EM() + + # Plot + fig, axes = pb.subplots(2,1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + print(m) + + return m + + def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): """ Sparse 1D classification example @@ -116,7 +152,7 @@ def toy_heaviside(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) + noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) likelihood = GPy.likelihoods.EP(Y,noise_model) m = GPy.models.GPClassification(data['X'], likelihood=likelihood) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 005b131f..666209f9 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -12,10 +12,10 @@ from GPy.likelihoods.gaussian import Gaussian default_seed = np.random.seed(123344) def BGPLVM(seed=default_seed): - N = 5 - num_inducing = 4 - Q = 3 - D = 2 + N = 13 + num_inducing = 5 + Q = 6 + D = 25 # generate GPLVM-like data X = np.random.rand(N, Q) lengthscales = np.random.rand(Q) @@ -25,9 +25,12 @@ def BGPLVM(seed=default_seed): Y = np.random.multivariate_normal(np.zeros(N), K, D).T lik = Gaussian(Y, normalize=True) - k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) - # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) + # k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + # k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) + # k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.rbf(Q, .3, np.ones(Q) * .2, ARD=True) + k = GPy.kern.rbf(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.linear(Q, np.ones(Q) * .2, ARD=True) + # k = GPy.kern.rbf(Q, .5, 2., ARD=0) + GPy.kern.rbf(Q, .3, .2, ARD=0) m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing) m.lengthscales = lengthscales @@ -331,27 +334,46 @@ def brendan_faces(): from GPy import kern data = GPy.util.datasets.brendan_faces() Q = 2 - Y = data['Y'][0:-1:10, :] - # Y = data['Y'] + Y = data['Y'] Yn = Y - Y.mean() Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) - m.optimize('scg', messages=1, max_f_eval=10000) + m.optimize('scg', messages=1, max_iters=1000) ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m + +def olivetti_faces(): + from GPy import kern + data = GPy.util.datasets.olivetti_faces() + Q = 2 + Y = data['Y'] + Yn = Y - Y.mean() + Yn /= Yn.std() + + m = GPy.models.GPLVM(Yn, Q) + m.optimize('scg', messages=1, max_iters=1000) + + ax = m.plot_latent(which_indices=(0, 1)) + y = m.likelihood.Y[0, :] + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) + lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + raw_input('Press enter to finish') + + return m + def stick_play(range=None, frame_rate=15): + data = GPy.util.datasets.osu_run1() # optimize if range == None: diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py new file mode 100644 index 00000000..64185885 --- /dev/null +++ b/GPy/examples/laplace_approximations.py @@ -0,0 +1,296 @@ +import GPy +import numpy as np +import matplotlib.pyplot as plt +from GPy.util import datasets +np.random.seed(1) + +def student_t_approx(): + """ + Example of regressing with a student t likelihood + """ + real_std = 0.1 + #Start a function, any function + X = np.linspace(0.0, np.pi*2, 100)[:, None] + Y = np.sin(X) + np.random.randn(*X.shape)*real_std + Yc = Y.copy() + + X_full = np.linspace(0.0, np.pi*2, 500)[:, None] + Y_full = np.sin(X_full) + + Y = Y/Y.max() + + #Slightly noisy data + Yc[75:80] += 1 + + #Very noisy data + #Yc[10] += 100 + #Yc[25] += 10 + #Yc[23] += 10 + #Yc[26] += 1000 + #Yc[24] += 10 + #Yc = Yc/Yc.max() + + #Add student t random noise to datapoints + deg_free = 5 + print "Real noise: ", real_std + initial_var_guess = 0.5 + + #t_rv = t(deg_free, loc=0, scale=real_var) + #noise = t_rvrvs(size=Y.shape) + #Y += noise + + plt.figure(1) + plt.suptitle('Gaussian likelihood') + # Kernel object + kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + kernel2 = kernel1.copy() + kernel3 = kernel1.copy() + kernel4 = kernel1.copy() + kernel5 = kernel1.copy() + kernel6 = kernel1.copy() + + print "Clean Gaussian" + #A GP should completely break down due to the points as they get a lot of weight + # create simple GP model + m = GPy.models.GPRegression(X, Y, kernel=kernel1) + # optimize + m.ensure_default_constraints() + m.constrain_fixed('white', 1e-4) + m.randomize() + m.optimize() + # plot + ax = plt.subplot(211) + m.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Gaussian clean') + print m + + #Corrupt + print "Corrupt Gaussian" + m = GPy.models.GPRegression(X, Yc, kernel=kernel2) + m.ensure_default_constraints() + m.constrain_fixed('white', 1e-4) + m.randomize() + m.optimize() + ax = plt.subplot(212) + m.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Gaussian corrupt') + print m + + plt.figure(2) + plt.suptitle('Student-t likelihood') + edited_real_sd = initial_var_guess + + print "Clean student t, rasm" + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) + stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) + m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood) + m.ensure_default_constraints() + m.constrain_positive('t_noise') + m.constrain_fixed('white', 1e-4) + m.randomize() + #m.update_likelihood_approximation() + m.optimize() + print(m) + ax = plt.subplot(211) + m.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Student-t rasm clean') + + print "Corrupt student t, rasm" + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) + corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution) + m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) + m.ensure_default_constraints() + m.constrain_positive('t_noise') + m.constrain_fixed('white', 1e-4) + m.randomize() + for a in range(1): + m.randomize() + m_start = m.copy() + print m + m.optimize('scg', messages=1) + print(m) + ax = plt.subplot(212) + m.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Student-t rasm corrupt') + + return m + +def boston_example(): + import sklearn + from sklearn.cross_validation import KFold + optimizer='bfgs' + messages=0 + data = datasets.boston_housing() + degrees_freedoms = [3, 5, 8, 10] + X = data['X'].copy() + Y = data['Y'].copy() + X = X-X.mean(axis=0) + X = X/X.std(axis=0) + Y = Y-Y.mean() + Y = Y/Y.std() + num_folds = 10 + kf = KFold(len(Y), n_folds=num_folds, indices=True) + num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx + score_folds = np.zeros((num_models, num_folds)) + pred_density = score_folds.copy() + + def rmse(Y, Ystar): + return np.sqrt(np.mean((Y-Ystar)**2)) + + for n, (train, test) in enumerate(kf): + X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] + print "Fold {}".format(n) + + noise = 1e-1 #np.exp(-2) + rbf_len = 0.5 + data_axis_plot = 4 + plot = False + kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + + #Baseline + score_folds[0, n] = rmse(Y_test, np.mean(Y_train)) + + #Gaussian GP + print "Gauss GP" + mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) + mgp.ensure_default_constraints() + mgp.constrain_fixed('white', 1e-5) + mgp['rbf_len'] = rbf_len + mgp['noise'] = noise + print mgp + mgp.optimize(optimizer=optimizer, messages=messages) + Y_test_pred = mgp.predict(X_test) + score_folds[1, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test)) + print mgp + print pred_density + if plot: + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('GP gauss') + + print "Gaussian Laplace GP" + N, D = Y_train.shape + g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D) + g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) + mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) + mg.ensure_default_constraints() + mg.constrain_positive('noise_variance') + mg.constrain_fixed('white', 1e-5) + mg['rbf_len'] = rbf_len + mg['noise'] = noise + print mg + try: + mg.optimize(optimizer=optimizer, messages=messages) + except Exception: + print "Blew up" + Y_test_pred = mg.predict(X_test) + score_folds[2, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test)) + print pred_density + print mg + if plot: + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('Lap gauss') + + for stu_num, df in enumerate(degrees_freedoms): + #Student T + print "Student-T GP {}df".format(df) + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) + stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) + mstu_t.ensure_default_constraints() + mstu_t.constrain_fixed('white', 1e-5) + mstu_t.constrain_bounded('t_noise', 0.0001, 1000) + mstu_t['rbf_len'] = rbf_len + mstu_t['t_noise'] = noise + print mstu_t + try: + mstu_t.optimize(optimizer=optimizer, messages=messages) + except Exception: + print "Blew up" + Y_test_pred = mstu_t.predict(X_test) + score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test)) + print pred_density + print mstu_t + if plot: + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('Stu t {}df'.format(df)) + + print "Average scores: {}".format(np.mean(score_folds, 1)) + print "Average pred density: {}".format(np.mean(pred_density, 1)) + + #Plotting + stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms] + legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends + + #Plot boxplots for RMSE density + fig = plt.figure() + ax=fig.add_subplot(111) + plt.title('RMSE') + bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5) + plt.setp(bp['boxes'], color='black') + plt.setp(bp['whiskers'], color='black') + plt.setp(bp['fliers'], color='red', marker='+') + xtickNames = plt.setp(ax, xticklabels=legends) + plt.setp(xtickNames, rotation=45, fontsize=8) + ax.set_ylabel('RMSE') + ax.set_xlabel('Distribution') + #Make grid and put it below boxes + ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', + alpha=0.5) + ax.set_axisbelow(True) + + #Plot boxplots for predictive density + fig = plt.figure() + ax=fig.add_subplot(111) + plt.title('Predictive density') + bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5) + plt.setp(bp['boxes'], color='black') + plt.setp(bp['whiskers'], color='black') + plt.setp(bp['fliers'], color='red', marker='+') + xtickNames = plt.setp(ax, xticklabels=legends[1:]) + plt.setp(xtickNames, rotation=45, fontsize=8) + ax.set_ylabel('Mean Log probability P(Y*|Y)') + ax.set_xlabel('Distribution') + #Make grid and put it below boxes + ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', + alpha=0.5) + ax.set_axisbelow(True) + return mstu_t + +def precipitation_example(): + import sklearn + from sklearn.cross_validation import KFold + data = datasets.boston_housing() + X = data['X'].copy() + Y = data['Y'].copy() + X = X-X.mean(axis=0) + X = X/X.std(axis=0) + Y = Y-Y.mean() + Y = Y/Y.std() + import ipdb; ipdb.set_trace() # XXX BREAKPOINT + num_folds = 10 + kf = KFold(len(Y), n_folds=num_folds, indices=True) + score_folds = np.zeros((4, num_folds)) + def rmse(Y, Ystar): + return np.sqrt(np.mean((Y-Ystar)**2)) + #for train, test in kf: + for n, (train, test) in enumerate(kf): + X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] + print "Fold {}".format(n) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 3bf2377e..a37e32c3 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -57,8 +57,8 @@ def coregionalization_toy(max_iters=100): m.optimize(max_iters=max_iters) fig, axes = pb.subplots(2,1) - m.plot_single_output(output=0,ax=axes[0]) - m.plot_single_output(output=1,ax=axes[1]) + m.plot(fixed_inputs=[(1,0)],ax=axes[0]) + m.plot(fixed_inputs=[(1,1)],ax=axes[1]) axes[0].set_title('Output 0') axes[1].set_title('Output 1') return m @@ -270,6 +270,50 @@ def toy_rbf_1d_50(max_iters=100): print(m) return m +def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100): + """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" + x_len = 400 + X = np.linspace(0, 10, x_len)[:, None] + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] + + noise_model = GPy.likelihoods.poisson() + likelihood = GPy.likelihoods.EP(Y,noise_model) + + # create simple GP Model + m = GPy.models.GPRegression(X, Y, likelihood=likelihood) + + # optimize + m.optimize(optimizer, max_f_eval=max_nb_eval_optim) + # plot + m.plot() + print(m) + return m + +def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100): + """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" + x_len = 30 + X = np.linspace(0, 10, x_len)[:, None] + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] + + noise_model = GPy.likelihoods.poisson() + likelihood = GPy.likelihoods.Laplace(Y,noise_model) + + # create simple GP Model + m = GPy.models.GPRegression(X, Y, likelihood=likelihood) + + # optimize + m.optimize(optimizer, max_f_eval=max_nb_eval_optim) + # plot + m.plot() + # plot the real underlying rate function + pb.plot(X, np.exp(f_true), '--k', linewidth=2) + print(m) + return m + + + def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4): # Create an artificial dataset where the values in the targets (Y) # only depend in dimensions 1 and 3 of the inputs (X). Run ARD to diff --git a/GPy/gpy_config.cfg b/GPy/gpy_config.cfg new file mode 100644 index 00000000..d52edd28 --- /dev/null +++ b/GPy/gpy_config.cfg @@ -0,0 +1,7 @@ +# This is the configuration file for GPy + +[parallel] +# Enable openmp support. This speeds up some computations, depending on the number +# of cores available. Setting up a compiler with openmp support can be difficult on +# some platforms, hence this option. +openmp=False diff --git a/GPy/inference/scg.py b/GPy/inference/scg.py index f4c7c9c4..252f348e 100644 --- a/GPy/inference/scg.py +++ b/GPy/inference/scg.py @@ -62,7 +62,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, fnow = fold gradnew = gradf(x, *optargs) # Initial gradient. if any(np.isnan(gradnew)): - raise UnexpectedInfOrNan + raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" current_grad = np.dot(gradnew, gradnew) gradold = gradnew.copy() d = -gradnew # Initial search direction. diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index a8ec1d4b..392f43ba 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -298,43 +298,67 @@ if sympy_available: """ Radial Basis Function covariance. """ - X = [sp.var('x%i' % i) for i in range(input_dim)] - Z = [sp.var('z%i' % i) for i in range(input_dim)] + X = sp.symbols('x_:' + str(input_dim)) + Z = sp.symbols('z_:' + str(input_dim)) variance = sp.var('variance',positive=True) if ARD: - lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)] - dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) + lengthscales = sp.symbols('lengthscale_:' + str(input_dim)) + dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) f = variance*sp.exp(-dist/2.) else: lengthscale = sp.var('lengthscale',positive=True) - dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)]) + dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) f = variance*sp.exp(-dist/(2*lengthscale**2)) return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')]) + def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.): + """ + Exponentiated quadratic with multiple outputs. + """ + real_input_dim = input_dim + if output_dim>1: + real_input_dim -= 1 + X = sp.symbols('x_:' + str(real_input_dim)) + Z = sp.symbols('z_:' + str(real_input_dim)) + scale = sp.var('scale_i scale_j',positive=True) + if ARD: + lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)] + shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)] + dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)]) + dist = parse_expr(dist_string) + f = variance*sp.exp(-dist/2.) + else: + lengthscales = sp.var('lengthscale_i lengthscale_j',positive=True) + shared_lengthscale = sp.var('shared_lengthscale',positive=True) + dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)]) + dist = parse_expr(dist_string) + f = scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2))) + return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')]) + def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): """ TODO: Not clear why this isn't working, suggests argument of sinc is not a number. sinc covariance funciton """ - X = [sp.var('x%i' % i) for i in range(input_dim)] - Z = [sp.var('z%i' % i) for i in range(input_dim)] + X = sp.symbols('x_:' + str(input_dim)) + Z = sp.symbols('z_:' + str(input_dim)) variance = sp.var('variance',positive=True) if ARD: lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)] - dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) + dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) f = variance*sinc(sp.pi*sp.sqrt(dist)) else: lengthscale = sp.var('lengthscale',positive=True) - dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)]) + dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)]) dist = parse_expr(dist_string) f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale) return kern(input_dim, [spkern(input_dim, f, name='sinc')]) - def sympykern(input_dim, k,name=None): + def sympykern(input_dim, k=None, output_dim=1, name=None, param=None): """ A base kernel object, where all the hard work in done by sympy. @@ -349,7 +373,7 @@ if sympy_available: - to handle multiple inputs, call them x1, z1, etc - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO """ - return kern(input_dim, [spkern(input_dim, k,name)]) + return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)]) del sympy_available def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi): diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 5a8882dd..37839423 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -18,37 +18,37 @@ class kern(Parameterized): like which parameters live where. The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts + e.g. rbf.py). This object contains a list of _parameters_, which are + computed additively. For multiplication, special _prod_ _parameters_ are used. :param input_dim: The dimensionality of the kernel's input space :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects + :param _parameters_: the '_parameters_' (PD functions) of the kernel + :type _parameters_: list of Kernpart objects :param input_slices: the slices on the inputs which apply to each kernel :type input_slices: list of slice objects, or list of bools """ - self.parts = parts - self.Nparts = len(parts) - self.num_params = sum([p.num_params for p in self.parts]) + self._parameters_ = parts + self.num_parts = len(parts) + self.num_params = sum([p.num_params for p in self._parameters_]) self.input_dim = input_dim - part_names = [k.name for k in self.parts] + part_names = [k.name for k in self._parameters_] self.name='' for name in part_names: self.name += name + '+' self.name = self.name[:-1] # deal with input_slices if input_slices is None: - self.input_slices = [slice(None) for p in self.parts] + self.input_slices = [slice(None) for p in self._parameters_] else: - assert len(input_slices) == len(self.parts) + assert len(input_slices) == len(self._parameters_) self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - for p in self.parts: + for p in self._parameters_: assert isinstance(p, Kernpart), "bad kernel part" self.compute_param_slices() @@ -60,8 +60,8 @@ class kern(Parameterized): Get the current state of the class, here just all the indices, rest can get recomputed """ - return Parameterized.getstate(self) + [self.parts, - self.Nparts, + return Parameterized.getstate(self) + [self._parameters_, + self.num_parts, self.num_params, self.input_dim, self.input_slices, @@ -73,21 +73,20 @@ class kern(Parameterized): self.input_slices = state.pop() self.input_dim = state.pop() self.num_params = state.pop() - self.Nparts = state.pop() - self.parts = state.pop() + self.num_parts = state.pop() + self._parameters_ = state.pop() Parameterized.setstate(self, state) def plot_ARD(self, fignum=None, ax=None, title='', legend=False): - """If an ARD kernel is present, it bar-plots the ARD parameters. + """If an ARD kernel is present, plot a bar representation using matplotlib :param fignum: figure number of the plot :param ax: matplotlib axis to plot on - :param title: - title of the plot, + :param title: + title of the plot, pass '' to not print a title pass None for a generic title - """ if ax is None: fig = pb.figure(fignum) @@ -100,7 +99,7 @@ class kern(Parameterized): xticklabels = [] bars = [] x0 = 0 - for p in self.parts: + for p in self._parameters_: c = Tango.nextMedium() if hasattr(p, 'ARD') and p.ARD: if title is None: @@ -152,6 +151,13 @@ class kern(Parameterized): return ax def _transform_gradients(self, g): + """ + Apply the transformations of the kernel so that the returned vector + represents the gradient in the transformed space (i.e. that given by + get_params_transformed()) + + :param g: the gradient vector for the current model, usually created by dK_dtheta + """ x = self._get_params() [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] @@ -162,22 +168,29 @@ class kern(Parameterized): return g def compute_param_slices(self): - """create a set of slices that can index the parameters of each part.""" + """ + Create a set of slices that can index the parameters of each part. + """ self.param_slices = [] count = 0 - for p in self.parts: + for p in self._parameters_: self.param_slices.append(slice(count, count + p.num_params)) count += p.num_params def __add__(self, other): - """ - Shortcut for `add`. - """ + """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) def add(self, other, tensor=False): """ - Add another kernel to this one. Both kernels are defined on the same _space_ + Add another kernel to this one. + + If Tensor is False, both kernels are defined on the same _space_. then + the created kernel will have the same number of inputs as self and + other (which must be the same). + + If Tensor is True, then the dimensions are stacked 'horizontally', so + that the resulting kernel has self.input_dim + other.input_dim :param other: the other kernel to be added :type other: GPy.kern @@ -189,7 +202,7 @@ class kern(Parameterized): other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) + newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) # transfer constraints: newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] @@ -200,7 +213,7 @@ class kern(Parameterized): newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] else: assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices) + newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) # transfer constraints: newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] newkern.constraints = self.constraints + other.constraints @@ -210,9 +223,7 @@ class kern(Parameterized): return newkern def __mul__(self, other): - """ - Shortcut for `prod`. - """ + """ Here we overload the '*' operator. See self.prod for more information""" return self.prod(other) def __pow__(self, other, tensor=False): @@ -228,7 +239,7 @@ class kern(Parameterized): :param other: the other kernel to be added :type other: GPy.kern :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool + :type tensor: bool """ K1 = self.copy() @@ -240,7 +251,7 @@ class kern(Parameterized): s1[sl1], s2[sl2] = [True], [True] slices += [s1 + s2] - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] + newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] if tensor: newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) @@ -255,12 +266,12 @@ class kern(Parameterized): # Build the array that allows to go from the initial indices of the param to the new ones K1_param = [] n = 0 - for k1 in K1.parts: + for k1 in K1._parameters_: K1_param += [range(n, n + k1.num_params)] n += k1.num_params n = 0 K2_param = [] - for k2 in K2.parts: + for k2 in K2._parameters_: K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] n += k2.num_params index_param = [] @@ -292,36 +303,47 @@ class kern(Parameterized): self.constrain(np.where(index_param == i)[0], t) def _get_params(self): - return np.hstack([p._get_params() for p in self.parts]) + return np.hstack([p._get_params() for p in self._parameters_]) def _set_params(self, x): - [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)] + [p._set_params(x[s]) for p, s in zip(self._parameters_, self.param_slices)] def _get_param_names(self): - # this is a bit nasty: we want to distinguish between parts with the same name by appending a count - part_names = np.array([k.name for k in self.parts], dtype=np.str) + # this is a bit nasty: we want to distinguish between _parameters_ with the same name by appending a count + part_names = np.array([k.name for k in self._parameters_], dtype=np.str) counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] - return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], []) + return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) def K(self, X, X2=None, which_parts='all'): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handles this as X2 == X. + :param which_parts: a list of booleans detailing whether to include + each of the part functions. By default, 'all' + indicates [True]*self.num_parts + """ if which_parts == 'all': - which_parts = [True] * self.Nparts + which_parts = [True] * self.num_parts assert X.shape[1] == self.input_dim if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] + [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] else: target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] return target def dK_dtheta(self, dL_dK, X, X2=None): """ Compute the gradient of the covariance function with respect to the parameters. - + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. :type dL_dK: Np.ndarray (num_samples x num_inducing) :param X: Observed data inputs @@ -329,18 +351,19 @@ class kern(Parameterized): :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim) + returns: dL_dtheta """ assert X.shape[1] == self.input_dim target = np.zeros(self.num_params) if X2 is None: - [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] + [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)] else: - [p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] + [p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self.param_slices)] return self._transform_gradients(target) def dK_dX(self, dL_dK, X, X2=None): - """Compute the gradient of the covariance function with respect to X. + """Compute the gradient of the objective function with respect to X. :param dL_dK: An array of gradients of the objective function with respect to the covariance function. :type dL_dK: np.ndarray (num_samples x num_inducing) @@ -351,18 +374,18 @@ class kern(Parameterized): target = np.zeros_like(X) if X2 is None: - [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] else: - [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def Kdiag(self, X, which_parts='all'): """Compute the diagonal of the covariance function for inputs X.""" if which_parts == 'all': - which_parts = [True] * self.Nparts + which_parts = [True] * self.num_parts assert X.shape[1] == self.input_dim target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] + [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] return target def dKdiag_dtheta(self, dL_dKdiag, X): @@ -370,134 +393,203 @@ class kern(Parameterized): assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] target = np.zeros(self.num_params) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] + [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)] return self._transform_gradients(target) def dKdiag_dX(self, dL_dKdiag, X): assert X.shape[1] == self.input_dim target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): target = np.zeros(self.num_params) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)] return self._transform_gradients(target) def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target_mu, target_S def psi1(self, Z, mu, S): target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): target = np.zeros((self.num_params)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] + [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self.param_slices, self.input_slices)] return self._transform_gradients(target) def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): """return shapes are num_samples,num_inducing,input_dim""" target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target_mu, target_S def psi2(self, Z, mu, S): """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - + :param Z: np.ndarray of inducing inputs (M x Q) + :param mu, S: np.ndarrays of means and variances (each N x Q) + :returns psi2: np.ndarray (N,M,M) """ target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: input_slices needed - crossterms = 0 + from parts.white import White + from parts.rbf import RBF + from parts.rbf_inv import RBFInv + from parts.bias import Bias + from parts.linear import Linear - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - # target += crossterms - return target + crossterms + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self._param_slices_), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + elif isinstance(p1, Bias) and isinstance(p2, (RBF, RBFInv)): + target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :]) + elif isinstance(p2, Bias) and isinstance(p1, (RBF, RBFInv)): + target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :]) + # linear X bias + elif isinstance(p1, Bias) and isinstance(p2, Linear): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z, mu, S, tmp) + target += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + elif isinstance(p2, Bias) and isinstance(p1, Linear): + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + target += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + # rbf X linear + elif isinstance(p1, Linear) and isinstance(p2, (RBF, RBFInv)): + pass + elif isinstance(p2, Linear) and isinstance(p1, (RBF, RBFInv)): + raise NotImplementedError # TODO + elif isinstance(p1, (RBF, RBFInv)) and isinstance(p2, (RBF, RBFInv)): + raise NotImplementedError # TODO + elif isinstance(p2, (RBF, RBFInv)) and isinstance(p1, (RBF, RBFInv)): + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.num_params) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] + target = np.zeros(self.Nparam) + [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self.param_slices)] # compute the "cross" terms # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self.parts)), 2): - p1, p2 = self.parts[i1], self.parts[i2] + for i1, i2 in itertools.combinations(range(len(self._parameters_)), 2): + p1, p2 = self._parameters_[i1], self._parameters_[i2] # ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] ps1, ps2 = self.param_slices[i1], self.param_slices[i2] - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1]) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2]) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) + psi1 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z, mu, S, psi1) + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1]) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) + psi1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, psi1) + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2]) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" return self._transform_gradients(target) def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] - # target *= 2 + [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self.parts, 2): - if p1.name == 'linear' and p2.name == 'linear': - raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) + for p1, p2 in itertools.combinations(self._parameters_, 2): + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" - return target * 2 + return target * 2. def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] # compute the "cross" terms # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self.parts, 2): - if p1.name == 'linear' and p2.name == 'linear': - raise NotImplementedError("We don't handle linear/linear cross-terms") - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + for p1, p2 in itertools.combinations(self._parameters_, 2): + # white doesn;t combine with anything + if p1.name == 'white' or p2.name == 'white': + pass + # rbf X bias + elif p1.name == 'bias' and p2.name == 'rbf': + p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif p2.name == 'bias' and p1.name == 'rbf': + p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) + # linear X bias + elif p1.name == 'bias' and p2.name == 'linear': + p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif p2.name == 'bias' and p1.name == 'linear': + p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) + # rbf X linear + elif p1.name == 'linear' and p2.name == 'rbf': + raise NotImplementedError # TODO + elif p2.name == 'linear' and p1.name == 'rbf': + raise NotImplementedError # TODO + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" return target_mu, target_S - def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): if which_parts == 'all': - which_parts = [True] * self.Nparts + which_parts = [True] * self.num_parts if self.input_dim == 1: if x is None: x = np.zeros((1, 1)) @@ -658,7 +750,7 @@ class Kern_check_dKdiag_dX(Kern_check_model): def _set_params(self, x): self.X=x.reshape(self.X.shape) -def kern_test(kern, X=None, X2=None, verbose=False): +def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): """This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set. :param kern: the kernel to be tested. @@ -672,8 +764,13 @@ def kern_test(kern, X=None, X2=None, verbose=False): pass_checks = True if X==None: X = np.random.randn(10, kern.input_dim) + if output_ind is not None: + X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) if X2==None: X2 = np.random.randn(20, kern.input_dim) + if output_ind is not None: + X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) + if verbose: print("Checking covariance function is positive definite.") result = Kern_check_model(kern, X=X).is_positive_definite() diff --git a/GPy/kern/parts/hetero.py b/GPy/kern/parts/hetero.py index d3939563..c716eaad 100644 --- a/GPy/kern/parts/hetero.py +++ b/GPy/kern/parts/hetero.py @@ -1,7 +1,6 @@ # Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from IPython.core.debugger import Tracer; debug_here=Tracer() from kernpart import Kernpart import numpy as np from ...util.linalg import tdot diff --git a/GPy/kern/parts/hierarchical.py b/GPy/kern/parts/hierarchical.py index ab96fdd7..c629f6b9 100644 --- a/GPy/kern/parts/hierarchical.py +++ b/GPy/kern/parts/hierarchical.py @@ -7,7 +7,7 @@ from independent_outputs import index_to_slices class Hierarchical(Kernpart): """ - A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs + A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs """ def __init__(self,parts): diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py index 475d835f..f6777083 100644 --- a/GPy/kern/parts/kernpart.py +++ b/GPy/kern/parts/kernpart.py @@ -5,15 +5,18 @@ class Kernpart(object): def __init__(self,input_dim): """ - The base class for a kernpart: a positive definite function which forms part of a kernel + The base class for a kernpart: a positive definite function which forms part of a covariance function (kernel). :param input_dim: the number of input dimensions to the function :type input_dim: int Do not instantiate. """ + # the input dimensionality for the covariance self.input_dim = input_dim + # the number of optimisable parameters self.num_params = 1 + # the name of the covariance function. self.name = 'unnamed' def _get_params(self): diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py index ffcbcf5e..ab96bb31 100644 --- a/GPy/kern/parts/linear.py +++ b/GPy/kern/parts/linear.py @@ -7,6 +7,7 @@ import numpy as np from ...util.linalg import tdot from ...util.misc import fast_array_equal from scipy import weave +from ...util.config import * class Linear(Kernpart): """ @@ -51,6 +52,26 @@ class Linear(Kernpart): self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._X, self._X2, self._params = np.empty(shape=(3, 1)) + # a set of optional args to pass to weave + weave_options_openmp = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + weave_options_noopenmp = {'extra_compile_args': ['-O3']} + + + if config.getboolean('parallel', 'openmp'): + self.weave_options = weave_options_openmp + self.weave_support_code = """ + #include + #include + """ + else: + self.weave_options = weave_options_noopenmp + self.weave_support_code = """ + #include + """ + def _get_params(self): return self.variances @@ -190,11 +211,17 @@ class Linear(Kernpart): #target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1) #target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1) + + if config.getboolean('parallel', 'openmp'): + pragma_string = "#pragma omp parallel for private(m,mm,q,qq,factor,tmp)" + else: + pragma_string = '' + #Using weave, we can exploiut the symmetry of this problem: code = """ int n, m, mm,q,qq; double factor,tmp; - #pragma omp parallel for private(m,mm,q,qq,factor,tmp) + %s for(n=0;n - #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} + """ % pragma_string - N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] - weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], - type_converters=weave.converters.blitz,**weave_options) + + N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1]) + weave.inline(code, support_code=self.weave_support_code, + arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], + type_converters=weave.converters.blitz,**self.weave_options) def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): @@ -240,9 +261,15 @@ class Linear(Kernpart): #dummy_target += psi2_dZ.sum(0).sum(0) AZA = self.variances*self.ZAinner + + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(n,mm,q)' + else: + pragma_string = '' + code=""" int n,m,mm,q; - #pragma omp parallel for private(n,mm,q) + %s for(m=0;m - #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} + """ % pragma_string - N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] - weave.inline(code, support_code=support_code, libraries=['gomp'], + + N,num_inducing,input_dim = int(mu.shape[0]),int(Z.shape[0]),int(mu.shape[1]) + weave.inline(code, support_code=self.weave_support_code, arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], - type_converters=weave.converters.blitz,**weave_options) - - - + type_converters=weave.converters.blitz,**self.weave_options) #---------------------------------------# diff --git a/GPy/kern/parts/periodic_Matern32.py b/GPy/kern/parts/periodic_Matern32.py index 5693085d..0de57f82 100644 --- a/GPy/kern/parts/periodic_Matern32.py +++ b/GPy/kern/parts/periodic_Matern32.py @@ -113,7 +113,7 @@ class PeriodicMatern32(Kernpart): @silence_errors def dK_dtheta(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)""" + """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) diff --git a/GPy/kern/parts/periodic_Matern52.py b/GPy/kern/parts/periodic_Matern52.py index 7b5ae846..882084fd 100644 --- a/GPy/kern/parts/periodic_Matern52.py +++ b/GPy/kern/parts/periodic_Matern52.py @@ -115,7 +115,7 @@ class PeriodicMatern52(Kernpart): @silence_errors def dK_dtheta(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)""" + """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) diff --git a/GPy/kern/parts/periodic_exponential.py b/GPy/kern/parts/periodic_exponential.py index 36b7b9ac..201def6d 100644 --- a/GPy/kern/parts/periodic_exponential.py +++ b/GPy/kern/parts/periodic_exponential.py @@ -111,7 +111,7 @@ class PeriodicExponential(Kernpart): @silence_errors def dK_dtheta(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)""" + """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 855e2b71..585d687f 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -7,6 +7,7 @@ import numpy as np from scipy import weave from ...util.linalg import tdot from ...util.misc import fast_array_equal +from ...util.config import * class RBF(Kernpart): """ @@ -57,12 +58,27 @@ class RBF(Kernpart): self._X, self._X2, self._params = np.empty(shape=(3, 1)) # a set of optional args to pass to weave - self.weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - 'extra_link_args' : ['-lgomp']} + weave_options_openmp = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + weave_options_noopenmp = {'extra_compile_args': ['-O3']} + if config.getboolean('parallel', 'openmp'): + self.weave_options = weave_options_openmp + self.weave_support_code = """ + #include + #include + """ + else: + self.weave_options = weave_options_noopenmp + self.weave_support_code = """ + #include + """ + + def _get_params(self): return np.hstack((self.variance, self.lengthscale)) @@ -110,7 +126,7 @@ class RBF(Kernpart): target(q+1) += var_len3(q)*tmp; } """ - num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim + num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim) weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options) else: code = """ @@ -126,7 +142,7 @@ class RBF(Kernpart): target(q+1) += var_len3(q)*tmp; } """ - num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim + num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim) # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)] weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options) else: @@ -287,10 +303,16 @@ class RBF(Kernpart): lengthscale2 = self.lengthscale2 else: lengthscale2 = np.ones(input_dim) * self.lengthscale2 + + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(tmp)' + else: + pragma_string = '' + code = """ double tmp; - #pragma omp parallel for private(tmp) + %s for (int n=0; n + %s #include - """ - weave.inline(code, support_code=support_code, libraries=['gomp'], + """ % pragma_string + + N, num_inducing, input_dim = int(N), int(num_inducing), int(input_dim) + weave.inline(code, support_code=support_code, arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/parts/rbf_inv.py index 0433e96c..1cc05aaa 100644 --- a/GPy/kern/parts/rbf_inv.py +++ b/GPy/kern/parts/rbf_inv.py @@ -7,6 +7,8 @@ import numpy as np import hashlib from scipy import weave from ...util.linalg import tdot +from ...util.config import * + class RBFInv(RBF): """ @@ -58,11 +60,23 @@ class RBFInv(RBF): self._X, self._X2, self._params = np.empty(shape=(3, 1)) # a set of optional args to pass to weave - self.weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - 'extra_link_args' : ['-lgomp']} - + weave_options_openmp = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + weave_options_noopenmp = {'extra_compile_args': ['-O3']} + if config.getboolean('parallel', 'openmp'): + self.weave_options = weave_options_openmp + self.weave_support_code = """ + #include + #include + """ + else: + self.weave_options = weave_options_noopenmp + self.weave_support_code = """ + #include + """ def _get_params(self): return np.hstack((self.variance, self.inv_lengthscale)) @@ -109,7 +123,7 @@ class RBFInv(RBF): target(q+1) += var_len3(q)*tmp*(-len2(q)); } """ - num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim + num_data, num_inducing, input_dim = int(X.shape[0]), int(X.shape[0]), int(self.input_dim) weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options) else: code = """ @@ -125,7 +139,7 @@ class RBFInv(RBF): target(q+1) += var_len3(q)*tmp*(-len2(q)); } """ - num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim + num_data, num_inducing, input_dim = int(X.shape[0]), int(X2.shape[0]), int(self.input_dim) # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)] weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options) else: @@ -133,7 +147,7 @@ class RBFInv(RBF): def dK_dX(self, dL_dK, X, X2, target): self._K_computations(X, X2) - if X2 is None: + if X2 is None: _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. @@ -263,8 +277,8 @@ class RBFInv(RBF): self._Z, self._mu, self._S = Z, mu, S def weave_psi2(self, mu, Zhat): - N, input_dim = mu.shape - num_inducing = Zhat.shape[0] + N, input_dim = int(mu.shape[0]), int(mu.shape[1]) + num_inducing = int(Zhat.shape[0]) mudist = np.empty((N, num_inducing, num_inducing, input_dim)) mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim)) @@ -279,10 +293,16 @@ class RBFInv(RBF): inv_lengthscale2 = self.inv_lengthscale2 else: inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2 + + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(tmp)' + else: + pragma_string = '' + code = """ double tmp; - #pragma omp parallel for private(tmp) + %s for (int n=0; n - #include - """ - weave.inline(code, support_code=support_code, libraries=['gomp'], + weave.inline(code, support_code=self.weave_support_code, arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp index 76dba4eb..e4df4d80 100644 --- a/GPy/kern/parts/sympy_helpers.cpp +++ b/GPy/kern/parts/sympy_helpers.cpp @@ -1,4 +1,7 @@ #include +#include +#include + double DiracDelta(double x){ // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills @@ -23,3 +26,36 @@ double sinc_grad(double x){ else return (x*cos(x) - sin(x))/(x*x); } + +double erfcx(double x){ + double xneg=-sqrt(log(DBL_MAX/2)); + double xmax = 1/(sqrt(M_PI)*DBL_MIN); + xmax = DBL_MAXxmax) + return 0.0; + else + return y; +} + +double ln_diff_erf(double x0, double x1){ + if (x0==x1) + return INFINITY; + else if(x0<0 && x1>0 || x0>0 && x1<0) + return log(erf(x0)-erf(x1)); + else if(x1>0) + return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1; + else + return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0; +} diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/parts/sympy_helpers.h index d5b495ca..56220167 100644 --- a/GPy/kern/parts/sympy_helpers.h +++ b/GPy/kern/parts/sympy_helpers.h @@ -4,3 +4,6 @@ double DiracDelta(double x, int foo); double sinc(double x); double sinc_grad(double x); + +double erfcx(double x); +double ln_diff_erf(double x0, double x1); diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index 9755e37b..88c179aa 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -9,6 +9,7 @@ import sys current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) import tempfile import pdb +import ast from kernpart import Kernpart class spkern(Kernpart): @@ -16,64 +17,388 @@ class spkern(Kernpart): A kernel object, where all the hard work in done by sympy. :param k: the covariance function - :type k: a positive definite sympy function of x1, z1, x2, z2... + :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... To construct a new sympy kernel, you'll need to define: - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z). - that's it! we'll extract the variables from the function k. Note: - - to handle multiple inputs, call them x1, z1, etc - - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO + - to handle multiple inputs, call them x_1, z_1, etc + - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ - def __init__(self,input_dim,k,name=None,param=None): + def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): if name is None: self.name='sympykern' else: self.name = name + if k is None: + raise ValueError, "You must provide an argument for the covariance function." self._sp_k = k sp_vars = [e for e in k.atoms() if e.is_Symbol] - self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:])) - self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:])) - assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)]) - assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)]) + self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) + self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) + # Check that variable names make sense. + assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) + assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) self.input_dim = len(self._sp_x) + self._real_input_dim = self.input_dim + if output_dim > 1: + self.input_dim += 1 assert self.input_dim == input_dim - self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name) - self.num_params = len(self._sp_theta) + self.output_dim = output_dim + # extract parameter names + thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - #deal with param - if param is None: - param = np.ones(self.num_params) - assert param.size==self.num_params - self._set_params(param) + + # Look for parameters with index. + if self.output_dim>1: + self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) + self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) + # Make sure parameter appears with both indices! + assert len(self._sp_theta_i)==len(self._sp_theta_j) + assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) + + # Extract names of shared parameters + self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] + + self.num_split_params = len(self._sp_theta_i) + self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] + for theta in self._split_theta_names: + setattr(self, theta, np.ones(self.output_dim)) + + self.num_shared_params = len(self._sp_theta) + self.num_params = self.num_shared_params+self.num_split_params*self.output_dim + + else: + self.num_split_params = 0 + self._split_theta_names = [] + self._sp_theta = thetas + self.num_shared_params = len(self._sp_theta) + self.num_params = self.num_shared_params + + for theta in self._sp_theta: + val = 1.0 + if param is not None: + if param.has_key(theta): + val = param[theta] + setattr(self, theta.name, val) + #deal with param + self._set_params(self._get_params()) #Differentiate! self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] + if self.output_dim > 1: + self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] + self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] - #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z] - #self.compute_psi_stats() + if False: + self.compute_psi_stats() + self._gen_code() - self.weave_kwargs = {\ - 'support_code':self._function_code,\ - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\ - 'headers':['"sympy_helpers.h"'],\ - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\ - #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\ - 'extra_compile_args':[],\ - 'extra_link_args':['-lgomp'],\ + if False: + extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] + else: + extra_compile_args = [] + + self.weave_kwargs = { + 'support_code':self._function_code, + 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], 'verbose':True} def __add__(self,other): return spkern(self._sp_k+other._sp_k) + def _gen_code(self): + """Generates the C functions necessary for computing the covariance function using the sympy objects as input.""" + #TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances?? + + #generate c functions from sympy objects + argument_sequence = self._sp_x+self._sp_z+self._sp_theta + code_list = [('k',self._sp_k)] + # gradients with respect to covariance input + code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)] + # gradient with respect to parameters + code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] + # gradient with respect to multiple output parameters + if self.output_dim > 1: + argument_sequence += self._sp_theta_i + self._sp_theta_j + code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] + (foo_c,self._function_code), (foo_h,self._function_header) = \ + codegen(code_list, "C",'foobar',argument_sequence=argument_sequence) + #put the header file where we can find it + f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w') + f.write(self._function_header) + f.close() + + # Substitute any known derivatives which sympy doesn't compute + self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + + + ############################################################ + # This is the basic argument construction for the C code. # + ############################################################ + + arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) + + # for multiple outputs need to also provide these arguments reversed. + if self.output_dim>1: + reverse_arg_list = list(arg_list) + reverse_arg_list.reverse() + + # Add in any 'shared' parameters to the list. + param_arg_list = [shared_params.name for shared_params in self._sp_theta] + arg_list += param_arg_list + + precompute_list=[] + if self.output_dim > 1: + reverse_arg_list+=list(param_arg_list) + split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] + split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] + arg_list += split_param_arg_list + reverse_arg_list += split_param_reverse_arg_list + # Extract the right output indices from the inputs. + c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] + precompute_list += c_define_output_indices + reverse_arg_string = ", ".join(reverse_arg_list) + arg_string = ", ".join(arg_list) + precompute_string = "\n".join(precompute_list) + + # Code to compute argments string needed when only X is provided. + X_arg_string = re.sub('Z','X',arg_string) + # Code to compute argument string when only diagonal is required. + diag_arg_string = re.sub('int jj','//int jj',X_arg_string) + diag_arg_string = re.sub('j','i',diag_arg_string) + diag_precompute_string = precompute_list[0] + + + # Here's the code to do the looping for K + self._K_code =\ + """ + // _K_code + // Code for computing the covariance function. + int i; + int j; + int N = target_array->dimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for + for (i=0;i1: + grad_func_list += c_define_output_indices + grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) + grad_func_string = '\n'.join(grad_func_list) + + self._dK_dtheta_code =\ + """ + // _dK_dtheta_code + // Code for computing gradient of covariance with respect to parameters. + int i; + int j; + int N = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;i1: + gradX_func_list += c_define_output_indices + gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_string = "\n".join(gradX_func_list) + + self._dK_dX_code = \ + """ + // _dK_dX_code + // Code for computing gradient of covariance with respect to inputs. + int i; + int j; + int N = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (int i=0;i1: + arg_names += self._split_theta_names + arg_names += ['output_dim'] + return arg_names + + def _weave_inline(self, code, X, target, Z=None, partial=None): + output_dim = self.output_dim + for shared_params in self._sp_theta: + locals()[shared_params.name] = getattr(self, shared_params.name) + + # Need to extract parameters first + for split_params in self._split_theta_names: + locals()[split_params] = getattr(self, split_params) + arg_names = self._get_arg_names(Z, partial) + weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs) + + def K(self,X,Z,target): + if Z is None: + self._weave_inline(self._K_code_X, X, target) + else: + self._weave_inline(self._K_code, X, target, Z) + + + def Kdiag(self,X,target): + self._weave_inline(self._Kdiag_code, X, target) + + def dK_dtheta(self,partial,X,Z,target): + if Z is None: + self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial) + else: + self._weave_inline(self._dK_dtheta_code, X, target, Z, partial) + + def dKdiag_dtheta(self,partial,X,target): + self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial) + + def dK_dX(self,partial,X,Z,target): + if Z is None: + self._weave_inline(self._dK_dX_code_X, X, target, Z, partial) + else: + self._weave_inline(self._dK_dX_code, X, target, Z, partial) + + def dKdiag_dX(self,partial,X,target): + self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial) + def compute_psi_stats(self): #define some normal distributions - mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)] - Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)] + mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] + Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] #do some integration! @@ -99,188 +424,29 @@ class spkern(Kernpart): self._sp_psi2 = self._sp_psi2.simplify() - def _gen_code(self): - #generate c functions from sympy objects - (foo_c,self._function_code),(foo_h,self._function_header) = \ - codegen([('k',self._sp_k)] \ - + [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\ - #+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\ - + [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\ - ,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta) - #put the header file where we can find it - f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w') - f.write(self._function_header) - f.close() + def _set_params(self,param): + assert param.size == (self.num_params) + for i, shared_params in enumerate(self._sp_theta): + setattr(self, shared_params.name, param[i]) + + if self.output_dim>1: + for i, split_params in enumerate(self._split_theta_names): + start = self.num_shared_params + i*self.output_dim + end = self.num_shared_params + (i+1)*self.output_dim + setattr(self, split_params, param[start:end]) - # Substitute any known derivatives which sympy doesn't compute - self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - - # Here's the code to do the looping for K - arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x] - + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z] - + ["param[%i]"%i for i in range(self.num_params)]) - - - self._K_code =\ - """ - int i; - int j; - int N = target_array->dimensions[0]; - int num_inducing = target_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for - for (i=0;idimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;idimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (int i=0;i1: + for split_params in self._split_theta_names: + params = np.hstack((params, getattr(self, split_params).flatten())) + return params def _get_param_names(self): - return [x.name for x in self._sp_theta] + if self.output_dim>1: + return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] + else: + return [x.name for x in self._sp_theta] diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 0cb62eb0..b46b59ff 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,7 +1,7 @@ from ep import EP +from laplace import Laplace from ep_mixed_noise import EP_Mixed_Noise from gaussian import Gaussian from gaussian_mixed_noise import Gaussian_Mixed_Noise +import noise_models from noise_model_constructors import * -# TODO: from Laplace import Laplace - diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index d242e583..32575813 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -18,8 +18,7 @@ class EP(likelihood): self.data = data self.num_data, self.output_dim = self.data.shape self.is_heteroscedastic = True - self.Nparams = 0 - self._transf_data = self.noise_model._preprocess_values(data) + self.num_params = 0 #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) @@ -55,6 +54,22 @@ class EP(likelihood): raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" return self.noise_model.predictive_values(mu,var) + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + return self.noise_model.log_predictive_density(y_test, mu_star, var_star) + def _get_params(self): #return np.zeros(0) return self.noise_model._get_params() @@ -134,7 +149,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) @@ -233,7 +248,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) @@ -336,7 +351,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py index ffc8cb51..f5452512 100644 --- a/GPy/likelihoods/ep_mixed_noise.py +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -31,7 +31,7 @@ class EP_Mixed_Noise(likelihood): self.data = np.vstack(data_list) self.N, self.output_dim = self.data.shape self.is_heteroscedastic = True - self.Nparams = 0#FIXME + self.num_params = 0#FIXME self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)]) #TODO non-gaussian index diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 8f66d074..85c028b4 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -15,7 +15,7 @@ class Gaussian(likelihood): """ def __init__(self, data, variance=1., normalize=False): self.is_heteroscedastic = False - self.Nparams = 1 + self.num_params = 1 self.Z = 0. # a correction factor which accounts for the approximation made N, self.output_dim = data.shape @@ -90,11 +90,25 @@ class Gaussian(likelihood): _95pc = mean + 2.*np.sqrt(true_var) return mean, true_var, _5pc, _95pc - def fit_full(self): + def log_predictive_density(self, y_test, mu_star, var_star): """ - No approximations needed + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + + .. Note: + Works as if each test point was provided individually, i.e. not full_cov """ - pass + y_rescaled = (y_test - self._offset)/self._scale + return -0.5*np.log(2*np.pi) -0.5*np.log(var_star + self._variance) -0.5*(np.square(y_rescaled - mu_star))/(var_star + self._variance) def _gradients(self, partial): return np.sum(partial) diff --git a/GPy/likelihoods/gaussian_mixed_noise.py b/GPy/likelihoods/gaussian_mixed_noise.py index 4df01ec2..696867c0 100644 --- a/GPy/likelihoods/gaussian_mixed_noise.py +++ b/GPy/likelihoods/gaussian_mixed_noise.py @@ -23,14 +23,14 @@ class Gaussian_Mixed_Noise(likelihood): :type normalize: False|True """ def __init__(self, data_list, noise_params=None, normalize=True): - self.Nparams = len(data_list) + self.num_params = len(data_list) self.n_list = [data.size for data in data_list] - self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)]) + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)]) if noise_params is None: - noise_params = [1.] * self.Nparams + noise_params = [1.] * self.num_params else: - assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.' + assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.' self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)] self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list] diff --git a/GPy/likelihoods/laplace.py b/GPy/likelihoods/laplace.py new file mode 100644 index 00000000..6a44d5b6 --- /dev/null +++ b/GPy/likelihoods/laplace.py @@ -0,0 +1,390 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# +#Parts of this file were influenced by the Matlab GPML framework written by +#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own. +# +#The GPML code is released under the FreeBSD License. +#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved. +# +#The code and associated documentation is available from +#http://gaussianprocess.org/gpml/code. + +import numpy as np +import scipy as sp +from likelihood import likelihood +from ..util.linalg import mdot, jitchol, pddet, dpotrs +from functools import partial as partial_func + +class Laplace(likelihood): + """Laplace approximation to a posterior""" + + def __init__(self, data, noise_model, extra_data=None): + """ + Laplace Approximation + + Find the moments \hat{f} and the hessian at this point + (using Newton-Raphson) of the unnormalised posterior + + Compute the GP variables (i.e. generate some Y^{squiggle} and + z^{squiggle} which makes a gaussian the same as the laplace + approximation to the posterior, but normalised + + Arguments + --------- + + :param data: array of data the likelihood function is approximating + :type data: NxD + :param noise_model: likelihood function - subclass of noise_model + :type noise_model: noise_model + :param extra_data: additional data used by some likelihood functions, + """ + self.data = data + self.noise_model = noise_model + self.extra_data = extra_data + + #Inital values + self.N, self.D = self.data.shape + self.is_heteroscedastic = True + self.Nparams = 0 + self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi)) + + self.restart() + likelihood.__init__(self) + + def restart(self): + """ + Reset likelihood variables to their defaults + """ + #Initial values for the GP variables + self.Y = np.zeros((self.N, 1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:, None] + self.Z = 0 + self.YYT = None + + self.old_Ki_f = None + + def predictive_values(self, mu, var, full_cov): + if full_cov: + raise NotImplementedError("Cannot make correlated predictions\ + with an Laplace likelihood") + return self.noise_model.predictive_values(mu, var) + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + return self.noise_model.log_predictive_density(y_test, mu_star, var_star) + + def _get_params(self): + return np.asarray(self.noise_model._get_params()) + + def _get_param_names(self): + return self.noise_model._get_param_names() + + def _set_params(self, p): + return self.noise_model._set_params(p) + + def _shared_gradients_components(self): + d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data) + dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5? + I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i) + return dL_dfhat, I_KW_i + + def _Kgradients(self): + """ + Gradients with respect to prior kernel parameters dL_dK to be chained + with dK_dthetaK to give dL_dthetaK + :returns: dL_dK matrix + :rtype: Matrix (1 x num_kernel_params) + """ + dL_dfhat, I_KW_i = self._shared_gradients_components() + dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data) + + #Explicit + #expl_a = np.dot(self.Ki_f, self.Ki_f.T) + #expl_b = self.Wi_K_i + #expl = 0.5*expl_a - 0.5*expl_b + #dL_dthetaK_exp = dK_dthetaK(expl, X) + + #Implicit + impl = mdot(dlp, dL_dfhat, I_KW_i) + + #No longer required as we are computing these in the gp already + #otherwise we would take them away and add them back + #dL_dthetaK_imp = dK_dthetaK(impl, X) + #dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp + #dL_dK = expl + impl + + #No need to compute explicit as we are computing dZ_dK to account + #for the difference between the K gradients of a normal GP, + #and the K gradients including the implicit part + dL_dK = impl + return dL_dK + + def _gradients(self, partial): + """ + Gradients with respect to likelihood parameters (dL_dthetaL) + + :param partial: Not needed by this likelihood + :type partial: lambda function + :rtype: array of derivatives (1 x num_likelihood_params) + """ + dL_dfhat, I_KW_i = self._shared_gradients_components() + dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data) + + #len(dlik_dthetaL) + num_params = len(self._get_param_names()) + # make space for one derivative for each likelihood parameter + dL_dthetaL = np.zeros(num_params) + for thetaL_i in range(num_params): + #Explicit + dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i]) + #- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i])))) + + np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i]) + ) + + #Implicit + dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i]) + dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL) + dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp + + return dL_dthetaL + + def _compute_GP_variables(self): + """ + Generate data Y which would give the normal distribution identical + to the laplace approximation to the posterior, but normalised + + GPy expects a likelihood to be gaussian, so need to caluclate + the data Y^{\tilde} that makes the posterior match that found + by a laplace approximation to a non-gaussian likelihood but with + a gaussian likelihood + + Firstly, + The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1}, + i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood, + we wish to find the hessian \Sigma^{\tilde} + that has the same curvature but using our new simulated data Y^{\tilde} + i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) + and we wish to find what Y^{\tilde} and \Sigma^{\tilde} + We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1} + + Secondly, + GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z + So we can suck up any differences between that and our log marginal likelihood approximation + p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W| + which we want to optimize instead, by equating them and rearranging, the difference is added onto + the log p(y) that GPy optimizes by default + + Thirdly, + Since we have gradients that depend on how we move f^{\hat}, we have implicit components + aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the + gp.py code + """ + Wi = 1.0/self.W + self.Sigma_tilde = np.diagflat(Wi) + + Y_tilde = Wi*self.Ki_f + self.f_hat + + self.Wi_K_i = self.W12BiW12 + self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) + self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data) + self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) + + Z_tilde = (+ self.lik + - 0.5*self.ln_B_det + + 0.5*self.ln_det_Wi_K + - 0.5*self.f_Ki_f + + 0.5*self.y_Wi_Ki_i_y + ) + + #Convert to float as its (1, 1) and Z must be a scalar + self.Z = np.float64(Z_tilde) + self.Y = Y_tilde + self.YYT = np.dot(self.Y, self.Y.T) + self.covariance_matrix = self.Sigma_tilde + self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None] + + #Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods + self.dZ_dK = self._Kgradients() + #+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again + + def fit_full(self, K): + """ + The laplace approximation algorithm, find K and expand hessian + For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability + + :param K: Prior covariance matrix evaluated at locations X + :type K: NxN matrix + """ + self.K = K.copy() + + #Find mode + self.f_hat = self.rasm_mode(self.K) + + #Compute hessian and other variables at mode + self._compute_likelihood_variables() + + #Compute fake variables replicating laplace approximation to posterior + self._compute_GP_variables() + + def _compute_likelihood_variables(self): + """ + Compute the variables required to compute gaussian Y variables + """ + #At this point get the hessian matrix (or vector as W is diagonal) + self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data) + + #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though + self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N)) + + self.Ki_f = self.Ki_f + self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f) + self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K) + + def _compute_B_statistics(self, K, W, a): + """ + Rasmussen suggests the use of a numerically stable positive definite matrix B + Which has a positive diagonal element and can be easyily inverted + + :param K: Prior Covariance matrix evaluated at locations X + :type K: NxN matrix + :param W: Negative hessian at a point (diagonal matrix) + :type W: Vector of diagonal values of hessian (1xN) + :param a: Matrix to calculate W12BiW12a + :type a: Matrix NxN + :returns: (W12BiW12, ln_B_det) + """ + if not self.noise_model.log_concave: + #print "Under 1e-10: {}".format(np.sum(W < 1e-10)) + W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + # If the likelihood is non-log-concave. We wan't to say that there is a negative variance + # To cause the posterior to become less certain than the prior and likelihood, + # This is a property only held by non-log-concave likelihoods + + + #W is diagonal so its sqrt is just the sqrt of the diagonal elements + W_12 = np.sqrt(W) + B = np.eye(self.N) + W_12*K*W_12.T + L = jitchol(B) + + W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0] + ln_B_det = 2*np.sum(np.log(np.diag(L))) + return W12BiW12, ln_B_det + + def rasm_mode(self, K, MAX_ITER=30): + """ + Rasmussen's numerically stable mode finding + For nomenclature see Rasmussen & Williams 2006 + Influenced by GPML (BSD) code, all errors are our own + + :param K: Covariance matrix evaluated at locations X + :type K: NxD matrix + :param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation + :type MAX_ITER: scalar + :returns: f_hat, mode on which to make laplace approxmiation + :rtype: NxD matrix + """ + #old_Ki_f = np.zeros((self.N, 1)) + + #Start f's at zero originally + if self.old_Ki_f is None: + old_Ki_f = np.zeros((self.N, 1)) + f = np.dot(K, old_Ki_f) + else: + #Start at the old best point + old_Ki_f = self.old_Ki_f.copy() + f = self.f_hat.copy() + + new_obj = -np.inf + old_obj = np.inf + + def obj(Ki_f, f): + return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data) + + difference = np.inf + epsilon = 1e-5 + #step_size = 1 + #rs = 0 + i = 0 + + while difference > epsilon and i < MAX_ITER: + W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data) + + W_f = W*f + grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data) + + b = W_f + grad + W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b)) + + #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet + full_step_Ki_f = b - W12BiW12Kb + dKi_f = full_step_Ki_f - old_Ki_f + + f_old = f.copy() + def inner_obj(step_size, old_Ki_f, dKi_f, K): + Ki_f = old_Ki_f + step_size*dKi_f + f = np.dot(K, Ki_f) + # This is nasty, need to set something within an optimization though + self.tmp_Ki_f = Ki_f.copy() + self.tmp_f = f.copy() + return -obj(Ki_f, f) + + i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K) + #Find the stepsize that minimizes the objective function using a brent line search + #The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full + #steps than get this exact then make a step, if B was bigger it might be the other way around though + new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun + f = self.tmp_f.copy() + Ki_f = self.tmp_Ki_f.copy() + + #Optimize without linesearch + #f_old = f.copy() + #update_passed = False + #while not update_passed: + #Ki_f = old_Ki_f + step_size*dKi_f + #f = np.dot(K, Ki_f) + + #old_obj = new_obj + #new_obj = obj(Ki_f, f) + #difference = new_obj - old_obj + ##print "difference: ",difference + #if difference < 0: + ##print "Objective function rose", np.float(difference) + ##If the objective function isn't rising, restart optimization + #step_size *= 0.8 + ##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size) + ##objective function isn't increasing, try reducing step size + #f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode + #old_obj = new_obj + #rs += 1 + #else: + #update_passed = True + + #old_Ki_f = self.Ki_f.copy() + + #difference = abs(new_obj - old_obj) + #old_obj = new_obj.copy() + #difference = np.abs(np.sum(f - f_old)) + difference = np.abs(np.sum(Ki_f - old_Ki_f)) + old_Ki_f = Ki_f.copy() + i += 1 + + self.old_Ki_f = old_Ki_f.copy() + if difference > epsilon: + print "Not perfect f_hat fit difference: {}".format(difference) + + self.Ki_f = Ki_f + return f diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index ca187305..5e7c8c68 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -23,6 +23,7 @@ class likelihood(Parameterized): """ def __init__(self): Parameterized.__init__(self) + self.dZ_dK = 0 def _get_params(self): raise NotImplementedError @@ -33,11 +34,36 @@ class likelihood(Parameterized): def _set_params(self, x): raise NotImplementedError - def fit(self): - raise NotImplementedError + def fit_full(self, K): + """ + No approximations needed by default + """ + pass + + def restart(self): + """ + No need to restart if not an approximation + """ + pass def _gradients(self, partial): raise NotImplementedError def predictive_values(self, mu, var): raise NotImplementedError + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + raise NotImplementedError diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index ec971e04..e626c6a3 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -4,9 +4,9 @@ import numpy as np import noise_models -def binomial(gp_link=None): +def bernoulli(gp_link=None): """ - Construct a binomial likelihood + Construct a bernoulli likelihood :param gp_link: a GPy gp_link function """ @@ -27,16 +27,17 @@ def binomial(gp_link=None): analytical_mean = False analytical_variance = False - return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) + return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance) def exponential(gp_link=None): + """ - Construct a binomial likelihood + Construct a exponential likelihood :param gp_link: a GPy gp_link function """ if gp_link is None: - gp_link = noise_models.gp_transformations.Identity() + gp_link = noise_models.gp_transformations.Log_ex_1() analytical_mean = False analytical_variance = False @@ -85,4 +86,36 @@ def gamma(gp_link=None,beta=1.): analytical_variance = False return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta) +def gaussian(gp_link=None, variance=2, D=None, N=None): + """ + Construct a Gaussian likelihood + :param gp_link: a GPy gp_link function + :param variance: variance + :type variance: scalar + :returns: Gaussian noise model: + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + analytical_mean = True + analytical_variance = True # ? + return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean, + analytical_variance, variance=variance, D=D, N=N) + +def student_t(gp_link=None, deg_free=5, sigma2=2): + """ + Construct a Student t likelihood + + :param gp_link: a GPy gp_link function + :param deg_free: degrees of freedom of student-t + :type deg_free: scalar + :param sigma2: variance + :type sigma2: scalar + :returns: Student-T noise model + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + analytical_mean = True + analytical_variance = True + return noise_models.student_t_noise.StudentT(gp_link, analytical_mean, + analytical_variance,deg_free, sigma2) diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index b47702a7..d1d134dc 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,7 +1,8 @@ import noise_distributions -import binomial_noise +import bernoulli_noise import exponential_noise import gaussian_noise import gamma_noise import poisson_noise +import student_t_noise import gp_transformations diff --git a/GPy/likelihoods/noise_models/bernoulli_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py new file mode 100644 index 00000000..2c4116da --- /dev/null +++ b/GPy/likelihoods/noise_models/bernoulli_noise.py @@ -0,0 +1,216 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Bernoulli(NoiseDistribution): + """ + Bernoulli likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + .. Note:: + Y is expected to take values in {-1,1} + Probit likelihood usually used + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + + ..Note:: Binary classification algorithm works better with classes {-1,1} + """ + Y_prep = Y.copy() + Y1 = Y[Y.flatten()==1].size + Y2 = Y[Y.flatten()==0].size + assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.' + Y_prep[Y.flatten() == 0] = -1 + return Y_prep + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + if data_i == 1: + sign = 1. + elif data_i == 0: + sign = -1 + else: + raise ValueError("bad value for Bernouilli observation (0,1)") + if isinstance(self.gp_link,gp_transformations.Probit): + z = sign*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + + elif isinstance(self.gp_link,gp_transformations.Heaviside): + a = sign*v_i/np.sqrt(tau_i) + Z_hat = std_norm_cdf(a) + N = std_norm_pdf(a) + mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i) + sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i + if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])): + stop + else: + raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.gp_transformations.__name__)) + + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + if isinstance(self.gp_link,gp_transformations.Probit): + return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) + elif isinstance(self.gp_link,gp_transformations.Heaviside): + return stats.norm.cdf(mu/sigma) + else: + raise NotImplementedError + + def _predictive_variance_analytical(self,mu,sigma, pred_mean): + if isinstance(self.gp_link,gp_transformations.Heaviside): + return 0. + else: + raise NotImplementedError + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: likelihood evaluated for this point + :rtype: float + + .. Note: + Each y_i must be in {0,1} + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + objective = (link_f**y) * ((1.-link_f)**(1.-y)) + return np.exp(np.sum(np.log(objective))) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: log likelihood evaluated at points link(f) + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #objective = y*np.log(link_f) + (1.-y)*np.log(link_f) + objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) + return np.sum(objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the pdf at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: gradient of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = (y/link_f) - (1.-y)/(1-link_f) + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j + i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + + + .. math:: + \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + return d2logpdf_dlink2 + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in bernoulli + :returns: third derivative of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + return d3logpdf_dlink3 + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.gp_link.transf(gp) + return p*(1.-p) + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + ns = np.ones_like(gp, dtype=int) + Ysim = np.random.binomial(ns, self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py deleted file mode 100644 index c0bb8be4..00000000 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats,special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import gp_transformations -from noise_distributions import NoiseDistribution - -class Binomial(NoiseDistribution): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance) - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - - ..Note:: Binary classification algorithm works better with classes {-1,1} - """ - Y_prep = Y.copy() - Y1 = Y[Y.flatten()==1].size - Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' - Y_prep[Y.flatten() == 0] = -1 - return Y_prep - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - if isinstance(self.gp_link,gp_transformations.Probit): - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - - elif isinstance(self.gp_link,gp_transformations.Heaviside): - a = data_i*v_i/np.sqrt(tau_i) - Z_hat = std_norm_cdf(a) - N = std_norm_pdf(a) - mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i) - sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i - if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])): - stop - - return Z_hat, mu_hat, sigma2_hat - - def _predictive_mean_analytical(self,mu,sigma): - if isinstance(self.gp_link,gp_transformations.Probit): - return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) - elif isinstance(self.gp_link,gp_transformations.Heaviside): - return stats.norm.cdf(mu/sigma) - else: - raise NotImplementedError - - def _predictive_variance_analytical(self,mu,sigma, pred_mean): - if isinstance(self.gp_link,gp_transformations.Heaviside): - return 0. - else: - raise NotImplementedError - - def _mass(self,gp,obs): - #NOTE obs must be in {0,1} - p = self.gp_link.transf(gp) - return p**obs * (1.-p)**(1.-obs) - - def _nlog_mass(self,gp,obs): - p = self.gp_link.transf(gp) - return obs*np.log(p) + (1.-obs)*np.log(1-p) - - def _dnlog_mass_dgp(self,gp,obs): - p = self.gp_link.transf(gp) - dp = self.gp_link.dtransf_df(gp) - return obs/p * dp - (1.-obs)/(1.-p) * dp - - def _d2nlog_mass_dgp2(self,gp,obs): - p = self.gp_link.transf(gp) - return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp) - - def _mean(self,gp): - """ - Mass (or density) function - """ - return self.gp_link.transf(gp) - - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - - def _variance(self,gp): - """ - Mass (or density) function - """ - p = self.gp_link.transf(gp) - return p*(1.-p) - - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) - - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 - - - def samples(self, gp): - """ - Returns a set of samples of observations based on a given value of the latent variable. - - :param size: number of samples to compute - :param gp: latent variable - """ - orig_shape = gp.shape - gp = gp.flatten() - Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp]) - return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py index 56e63c75..602ccea5 100644 --- a/GPy/likelihoods/noise_models/exponential_noise.py +++ b/GPy/likelihoods/noise_models/exponential_noise.py @@ -24,24 +24,113 @@ class Exponential(NoiseDistribution): def _preprocess_values(self,Y): return Y - def _mass(self,gp,obs): + def pdf_link(self, link_f, y, extra_data=None): """ - Mass (or density) function - """ - return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) + Likelihood function given link(f) - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})\\exp (-y\\lambda(f_{i})) - def _dnlog_mass_dgp(self,gp,obs): - return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + log_objective = link_f*np.exp(-y*link_f) + return np.exp(np.sum(np.log(log_objective))) + #return np.exp(np.sum(-y/link_f - np.log(link_f) )) - def _d2nlog_mass_dgp2(self,gp,obs): - fgp = self.gp_link.transf(gp) - return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i}) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + log_objective = np.log(link_f) - y*link_f + #logpdf_link = np.sum(-np.log(link_f) - y/link_f) + return np.sum(log_objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = 1./link_f - y + #grad = y/(link_f**2) - 1./link_f + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -1./(link_f**2) + #hess = -2*y/(link_f**3) + 1/(link_f**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in exponential distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2./(link_f**3) + #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) + return d3lik_dlink3 def _mean(self,gp): """ @@ -49,20 +138,19 @@ class Exponential(NoiseDistribution): """ return self.gp_link.transf(gp) - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - def _variance(self,gp): """ Mass (or density) function """ return self.gp_link.transf(gp)**2 - def _dvariance_dgp(self,gp): - return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp) + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. - def _d2variance_dgp2(self,gp): - return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp)) + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.random.exponential(1.0/self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/gamma_noise.py b/GPy/likelihoods/noise_models/gamma_noise.py index 6bf0dd7b..2be3106a 100644 --- a/GPy/likelihoods/noise_models/gamma_noise.py +++ b/GPy/likelihoods/noise_models/gamma_noise.py @@ -12,11 +12,11 @@ from noise_distributions import NoiseDistribution class Gamma(NoiseDistribution): """ Gamma likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): self.beta = beta @@ -25,26 +25,122 @@ class Gamma(NoiseDistribution): def _preprocess_values(self,Y): return Y - def _mass(self,gp,obs): + def pdf_link(self, link_f, y, extra_data=None): """ - Mass (or density) function + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\beta^{\\alpha_{i}}}{\\Gamma(\\alpha_{i})}y_{i}^{\\alpha_{i}-1}e^{-\\beta y_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) - alpha = self.gp_link.transf(gp)*self.beta - return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha) + alpha = link_f*self.beta + objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha) + return np.exp(np.sum(np.log(objective))) - def _nlog_mass(self,gp,obs): + def logpdf_link(self, link_f, y, extra_data=None): """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ - alpha = self.gp_link.transf(gp)*self.beta - return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + #alpha = self.gp_link.transf(gp)*self.beta + #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + alpha = link_f*self.beta + log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y + return np.sum(log_objective) - def _dnlog_mass_dgp(self,gp,obs): - return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) - def _d2nlog_mass_dgp2(self,gp,obs): - return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta + #old + #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) + #old + #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\ + \\alpha_{i} = \\beta y_{i} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in gamma distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) + return d3lik_dlink3 def _mean(self,gp): """ @@ -52,20 +148,8 @@ class Gamma(NoiseDistribution): """ return self.gp_link.transf(gp) - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - def _variance(self,gp): """ Mass (or density) function """ return self.gp_link.transf(gp)/self.beta - - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp)/self.beta - - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp)/self.beta diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py index 93ac9acd..fce84d27 100644 --- a/GPy/likelihoods/noise_models/gaussian_noise.py +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -12,11 +12,17 @@ class Gaussian(NoiseDistribution): """ Gaussian likelihood - :param mean: mean value of the Gaussian distribution - :param variance: mean value of the Gaussian distribution + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param variance: variance value of the Gaussian distribution + :param N: Number of data points + :type N: int """ - def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.): + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None): self.variance = variance + self.N = N + self._set_params(np.asarray(variance)) super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) def _get_params(self): @@ -25,8 +31,13 @@ class Gaussian(NoiseDistribution): def _get_param_names(self): return ['noise_model_variance'] - def _set_params(self,p): - self.variance = p + def _set_params(self, p): + self.variance = float(p) + self.I = np.eye(self.N) + self.covariance_matrix = self.I * self.variance + self.Ki = self.I*(1.0 / self.variance) + #self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix))) + self.ln_det_K = self.N*np.log(self.variance) def _gradients(self,partial): return np.zeros(1) @@ -57,42 +68,231 @@ class Gaussian(NoiseDistribution): new_sigma2 = self.predictive_variance(mu,sigma) return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) - def _predictive_variance_analytical(self,mu,sigma): + def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None): return 1./(1./self.variance + 1./sigma**2) - def _mass(self,gp,obs): - #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) - return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) + def _mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use pdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") + def _nlog_mass(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") - def _nlog_mass(self,gp,obs): - return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance)) + def _dnlog_mass_dgp(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") - def _dnlog_mass_dgp(self,gp,obs): - return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) + def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None): + NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\ + Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\ + rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\ + its derivatives") - def _d2nlog_mass_dgp2(self,gp,obs): - return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: likelihood evaluated for this point + :rtype: float + """ + #Assumes no covariance, exp, sum, log for numerical stability + return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance))))) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log likelihood function given link(f) + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: log likelihood evaluated for this point + :rtype: float + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the pdf at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i})) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: gradient of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s2_i = (1.0/self.variance) + grad = s2_i*y - s2_i*link_f + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link_f, w.r.t link_f. + i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + hess = -(1.0/self.variance)*np.ones((self.N, 1)) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0 + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: third derivative of log likelihood evaluated at points link(f) + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] + return d3logpdf_dlink3 + + def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter + :rtype: float + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + e = y - link_f + s_4 = 1.0/(self.variance**2) + dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e)) + return np.sum(dlik_dsigma) # Sure about this sum? + + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + """ + Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i})) + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s_4 = 1.0/(self.variance**2) + dlik_grad_dsigma = -s_4*y + s_4*link_f + return dlik_grad_dsigma + + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data not used in gaussian + :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.asarray(link_f).shape == np.asarray(y).shape + s_4 = 1.0/(self.variance**2) + d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] + return d2logpdf_dlink2_dvar + + def dlogpdf_link_dtheta(self, f, y, extra_data=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + return np.asarray([[dlogpdf_dvar]]) + + def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + return d2logpdf_dlink2_dvar def _mean(self,gp): """ - Mass (or density) function + Expected value of y under the Mass (or density) function p(y|f) + + .. math:: + E_{p(y|f)}[y] """ return self.gp_link.transf(gp) - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - def _variance(self,gp): """ - Mass (or density) function + Variance of y under the Mass (or density) function p(y|f) + + .. math:: + Var_{p(y|f)}[y] """ return self.variance - def _dvariance_dgp(self,gp): - return 0 + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. - def _d2variance_dgp2(self,gp): - return 0 + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp]) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index e95e9df7..65730418 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -24,19 +24,25 @@ class GPTransformation(object): """ Gaussian process tranformation function, latent space -> output space """ - pass + raise NotImplementedError def dtransf_df(self,f): """ derivative of transf(f) w.r.t. f """ - pass + raise NotImplementedError def d2transf_df2(self,f): """ second derivative of transf(f) w.r.t. f """ - pass + raise NotImplementedError + + def d3transf_df3(self,f): + """ + third derivative of transf(f) w.r.t. f + """ + raise NotImplementedError class Identity(GPTransformation): """ @@ -49,10 +55,13 @@ class Identity(GPTransformation): return f def dtransf_df(self,f): - return 1. + return np.ones_like(f) def d2transf_df2(self,f): - return 0 + return np.zeros_like(f) + + def d3transf_df3(self,f): + return np.zeros_like(f) class Probit(GPTransformation): @@ -71,6 +80,10 @@ class Probit(GPTransformation): def d2transf_df2(self,f): return -f * std_norm_pdf(f) + def d3transf_df3(self,f): + f2 = f**2 + return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) + class Log(GPTransformation): """ .. math:: @@ -87,6 +100,9 @@ class Log(GPTransformation): def d2transf_df2(self,f): return np.exp(f) + def d3transf_df3(self,f): + return np.exp(f) + class Log_ex_1(GPTransformation): """ .. math:: @@ -104,15 +120,23 @@ class Log_ex_1(GPTransformation): aux = np.exp(f)/(1.+np.exp(f)) return aux*(1.-aux) + def d3transf_df3(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + daux_df = aux*(1.-aux) + return daux_df - (2.*aux*daux_df) + class Reciprocal(GPTransformation): - def transf(sefl,f): + def transf(self,f): return 1./f def dtransf_df(self,f): - return -1./f**2 + return -1./(f**2) def d2transf_df2(self,f): - return 2./f**3 + return 2./(f**3) + + def d3transf_df3(self,f): + return -6./(f**4) class Heaviside(GPTransformation): """ diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 913c2b9d..77cc82a4 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -9,15 +9,12 @@ import pylab as pb from GPy.util.plot import gpplot from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import gp_transformations - +from GPy.util.misc import chain_1, chain_2, chain_3 +from scipy.integrate import quad class NoiseDistribution(object): """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - - .. note:: Y values allowed depend on the LikelihoodFunction used + Likelihood class for doing approximations """ def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation." @@ -35,6 +32,8 @@ class NoiseDistribution(object): else: self.predictive_variance = self._predictive_variance_numerical + self.log_concave = True + def _get_params(self): return np.zeros(0) @@ -57,369 +56,379 @@ class NoiseDistribution(object): """ return Y - def _product(self,gp,obs,mu,sigma): - """ - Product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) - - def _nlog_product_scaled(self,gp,obs,mu,sigma): - """ - Negative log-product between the cavity distribution and a likelihood factor. - - .. note:: The constant term in the Gaussian distribution is ignored. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) - - def _dnlog_product_dgp(self,gp,obs,mu,sigma): - """ - Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) - - def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): - """ - Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. - - :param gp: latent variable - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) - - def _product_mode(self,obs,mu,sigma): - """ - Newton's CG method to find the mode in _product (cavity x likelihood factor). - - :param obs: observed output - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False) - def _moments_match_analytical(self,obs,tau,v): """ If available, this function computes the moments analytically. """ - pass + raise NotImplementedError + + def log_predictive_density(self, y_test, mu_star, var_star): + """ + Calculation of the log predictive density + + .. math: + p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) + + :param y_test: test observations (y_{*}) + :type y_test: (Nx1) array + :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) + :type mu_star: (Nx1) array + :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) + :type var_star: (Nx1) array + """ + assert y_test.shape==mu_star.shape + assert y_test.shape==var_star.shape + assert y_test.shape[1] == 1 + def integral_generator(y, m, v): + """Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*""" + def f(f_star): + return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star)) + return f + + scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())]) + scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1) + p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star) + return np.log(p_ystar) def _moments_match_numerical(self,obs,tau,v): """ - Lapace approximation to calculate the moments. + Calculation of moments using quadrature :param obs: observed output :param tau: cavity distribution 1st natural parameter (precision) :param v: cavity distribution 2nd natural paramenter (mu*precision) - """ + #Compute first integral for zeroth moment mu = v/tau - mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) - sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) - Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) - return Z_hat,mu_hat,sigma2_hat + def int_1(f): + return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + z, accuracy = quad(int_1, -np.inf, np.inf) + z /= np.sqrt(2*np.pi/tau) - def _nlog_conditional_mean_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. + #Compute second integral for first moment + def int_2(f): + return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + mean, accuracy = quad(int_2, -np.inf, np.inf) + mean /= np.sqrt(2*np.pi/tau) + mean /= z - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation + #Compute integral for variance + def int_3(f): + return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) + Ef2, accuracy = quad(int_3, -np.inf, np.inf) + Ef2 /= np.sqrt(2*np.pi/tau) + Ef2 /= z + variance = Ef2 - mean**2 - .. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) - - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) - - def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_conditional_mean_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 - - def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - .. note:: This function helps computing E(V(Y_star|f_star)) - - """ - return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) - - def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) - - def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 - - def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): - """ - Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - .. note:: This function helps computing E( E(Y_star|f_star)**2 ) - - """ - return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) - - def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): - """ - Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) - - def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): - """ - Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. - - :param gp: latent variable - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) + return z, mean, variance def _predictive_mean_analytical(self,mu,sigma): """ + Predictive mean + .. math:: + E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) ) + If available, this function computes the predictive mean analytically. """ - pass + raise NotImplementedError def _predictive_variance_analytical(self,mu,sigma): """ + Predictive variance + .. math:: + V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) ) + If available, this function computes the predictive variance analytically. """ - pass + raise NotImplementedError def _predictive_mean_numerical(self,mu,sigma): """ - Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) + Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation + :param mu: mean of posterior + :param sigma: standard deviation of posterior """ - maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False) - mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) - """ + #FIXME: Quadrature does not work! + raise NotImplementedError + sigma2 = sigma**2 + #Compute first moment + def int_mean(f): + return self._mean(f)*np.exp(-(0.5/sigma2)*np.square(f - mu)) + scaled_mean, accuracy = quad(int_mean, -np.inf, np.inf) + mean = scaled_mean / np.sqrt(2*np.pi*(sigma2)) - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*mean,'r-') - pb.vlines(maximum,0,f.max()) - """ return mean - def _predictive_mean_sq(self,mu,sigma): - """ - Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) - - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - - """ - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False) - mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) - return mean_squared - def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None): """ Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation + :param mu: mean of posterior + :param sigma: standard deviation of posterior :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. """ + sigma2 = sigma**2 + normalizer = np.sqrt(2*np.pi*sigma2) + # E( V(Y_star|f_star) ) - maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False) - exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) + #Compute expected value of variance + def int_var(f): + return self._variance(f)*np.exp(-(0.5/sigma2)*np.square(f - mu)) + scaled_exp_variance, accuracy = quad(int_var, -np.inf, np.inf) + exp_var = scaled_exp_variance / normalizer - """ - pb.figure() - x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) - f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) - pb.plot(x,f,'b-') - sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) - f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) - k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) - pb.plot(x,f2*exp_var,'r--') - pb.vlines(maximum,0,f.max()) - """ - - #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) - exp_exp2 = self._predictive_mean_sq(mu,sigma) + #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 if predictive_mean is None: predictive_mean = self.predictive_mean(mu,sigma) + + predictive_mean_sq = predictive_mean**2 + def int_pred_mean_sq(f): + return predictive_mean_sq*np.exp(-(0.5/(sigma2))*np.square(f - mu)) + + scaled_exp_exp2, accuracy = quad(int_pred_mean_sq, -np.inf, np.inf) + exp_exp2 = scaled_exp_exp2 / normalizer + var_exp = exp_exp2 - predictive_mean**2 + # V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) return exp_var + var_exp - def _predictive_percentiles(self,p,mu,sigma): + def pdf_link(self, link_f, y, extra_data=None): + raise NotImplementedError + + def logpdf_link(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_link_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None): + raise NotImplementedError + + def pdf(self, f, y, extra_data=None): """ - Percentiles of the predictive distribution + Evaluates the link function link(f) then computes the likelihood (pdf) using it - :parm p: lower tail probability - :param mu: cavity distribution mean - :param sigma: cavity distribution standard deviation - :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + .. math: + p(y|\\lambda(f)) + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: likelihood evaluated for this point + :rtype: float """ - qf = stats.norm.ppf(p,mu,sigma) - return self.gp_link.transf(qf) + link_f = self.gp_link.transf(f) + return self.pdf_link(link_f, y, extra_data=extra_data) - def _nlog_joint_predictive_scaled(self,x,mu,sigma): + def logpdf(self, f, y, extra_data=None): """ - Negative logarithm of the joint predictive distribution (latent variable and output). + Evaluates the link function link(f) then computes the log likelihood (log pdf) using it - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation + .. math: + \\log p(y|\\lambda(f)) + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: log likelihood evaluated for this point + :rtype: float """ - return self._nlog_product_scaled(x[0],x[1],mu,sigma) + link_f = self.gp_link.transf(f) + return self.logpdf_link(link_f, y, extra_data=extra_data) - def _gradient_nlog_joint_predictive(self,x,mu,sigma): + def dlogpdf_df(self, f, y, extra_data=None): """ - Gradient of _nlog_joint_predictive_scaled. + Evaluates the link function link(f) then computes the derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - .. note: Only available when the output is continuous + .. math:: + \\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df} + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of log likelihood evaluated for this point + :rtype: 1xN array """ - assert not self.discrete, "Gradient not available for discrete outputs." - return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) + link_f = self.gp_link.transf(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + return chain_1(dlogpdf_dlink, dlink_df) - def _hessian_nlog_joint_predictive(self,x,mu,sigma): + def d2logpdf_df2(self, f, y, extra_data=None): """ - Hessian of _nlog_joint_predictive_scaled. + Evaluates the link function link(f) then computes the second derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation - - .. note: Only available when the output is continuous + .. math:: + \\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}} + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: second derivative of log likelihood evaluated for this point (diagonal only) + :rtype: 1xN array """ - assert not self.discrete, "Hessian not available for discrete outputs." - cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) - return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) + link_f = self.gp_link.transf(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + d2link_df2 = self.gp_link.d2transf_df2(f) + return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) - def _joint_predictive_mode(self,mu,sigma): + def d3logpdf_df3(self, f, y, extra_data=None): """ - Negative logarithm of the joint predictive distribution (latent variable and output). + Evaluates the link function link(f) then computes the third derivative of log likelihood using it + Uses the Faa di Bruno's formula for the chain rule - :param x: tuple (latent variable,output) - :param mu: latent variable's predictive mean - :param sigma: latent variable's predictive standard deviation + .. math:: + \\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}} + :param f: latent variables f + :type f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: third derivative of log likelihood evaluated for this point + :rtype: float """ - return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False) + link_f = self.gp_link.transf(f) + d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data) + dlink_df = self.gp_link.dtransf_df(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data) + d2link_df2 = self.gp_link.d2transf_df2(f) + dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data) + d3link_df3 = self.gp_link.d3transf_df3(f) + return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) - def predictive_values(self,mu,var): + def dlogpdf_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([1, 0]) + + def dlogpdf_df_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_1(dlogpdf_dlink_dtheta, dlink_df) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def d2logpdf_df2_dtheta(self, f, y, extra_data=None): + """ + TODO: Doc strings + """ + if len(self._get_param_names()) > 0: + link_f = self.gp_link.transf(f) + dlink_df = self.gp_link.dtransf_df(f) + d2link_df2 = self.gp_link.d2transf_df2(f) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) + return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) + else: + #Is no parameters so return an empty array for its derivatives + return np.empty([f.shape[0], 0]) + + def _laplace_gradients(self, f, y, extra_data=None): + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) + dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data) + d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data) + + #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' + # ensure we have gradients for every parameter we want to optimize + assert dlogpdf_dtheta.shape[1] == len(self._get_param_names()) + assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names()) + assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) + return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta + + def predictive_values(self, mu, var, full_cov=False, num_samples=30000, + sampling=False): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. - :param mu: mean of the latent variable - :param var: variance of the latent variable + :param mu: mean of the latent variable, f, of posterior + :param var: variance of the latent variable, f, of posterior + :param full_cov: whether to use the full covariance or just the diagonal + :type full_cov: Boolean + :param num_samples: number of samples to use in computing quantiles and + possibly mean variance + :type num_samples: integer + :param sampling: Whether to use samples for mean and variances anyway + :type sampling: Boolean """ - if isinstance(mu,float) or isinstance(mu,int): - mu = [mu] - var = [var] - pred_mean = [] - pred_var = [] - q1 = [] - q3 = [] - for m,s in zip(mu,np.sqrt(var)): - pred_mean.append(self.predictive_mean(m,s)) - pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) - q1.append(self._predictive_percentiles(.025,m,s)) - q3.append(self._predictive_percentiles(.975,m,s)) + + #Get gp_samples f* using posterior mean and variance + if not full_cov: + gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()), + size=num_samples).T + else: + gp_samples = np.random.multivariate_normal(mu.flatten(), var, + size=num_samples).T + + #Push gp samples (f*) through likelihood to give p(y*|f*) + samples = self.samples(gp_samples) + axis=-1 + + if self.analytical_mean and not sampling: + pred_mean = self.predictive_mean(mu, np.sqrt(var)) + else: + pred_mean = np.mean(samples, axis=axis) + + if self.analytical_variance and not sampling: + pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean) + else: + pred_var = np.var(samples, axis=axis) + + #Calculate quantiles from samples + q1 = np.percentile(samples, 2.5, axis=axis) + q3 = np.percentile(samples, 97.5, axis=axis) + print "WARNING: Using sampling to calculate predictive quantiles" + pred_mean = np.vstack(pred_mean) pred_var = np.vstack(pred_var) q1 = np.vstack(q1) q3 = np.vstack(q3) return pred_mean, pred_var, q1, q3 - def samples(self, gp): """ Returns a set of samples of observations based on a given value of the latent variable. :param gp: latent variable """ - pass - + raise NotImplementedError diff --git a/GPy/likelihoods/noise_models/poisson_noise.py b/GPy/likelihoods/noise_models/poisson_noise.py index 33de84cd..b0300704 100644 --- a/GPy/likelihoods/noise_models/poisson_noise.py +++ b/GPy/likelihoods/noise_models/poisson_noise.py @@ -1,7 +1,7 @@ +from __future__ import division # Copyright (c) 2012, 2013 Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) - import numpy as np from scipy import stats,special import scipy as sp @@ -14,9 +14,10 @@ class Poisson(NoiseDistribution): Poisson likelihood .. math:: - L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!} + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} - ..Note: Y is expected to take values in {0,1,2,...} + .. Note:: + Y is expected to take values in {0,1,2,...} """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) @@ -24,25 +25,108 @@ class Poisson(NoiseDistribution): def _preprocess_values(self,Y): #TODO return Y - def _mass(self,gp,obs): + def pdf_link(self, link_f, y, extra_data=None): """ - Mass (or density) function - """ - return stats.poisson.pmf(obs,self.gp_link.transf(gp)) + Likelihood function given link(f) - def _nlog_mass(self,gp,obs): - """ - Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted - """ - return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1)) + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\lambda(f_{i})^{y_{i}}}{y_{i}!}e^{-\\lambda(f_{i})} - def _dnlog_mass_dgp(self,gp,obs): - return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp)) + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.prod(stats.poisson.pmf(y,link_f)) - def _d2nlog_mass_dgp2(self,gp,obs): - d2_df = self.gp_link.d2transf_df2(gp) - transf = self.gp_link.transf(gp) - return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}! + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1 + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + return y/link_f - 1 + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + hess = -y/(link_f**2) + return hess + #d2_df = self.gp_link.d2transf_df2(gp) + #transf = self.gp_link.transf(gp) + #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in poisson distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + d3lik_dlink3 = 2*y/(link_f)**3 + return d3lik_dlink3 def _mean(self,gp): """ @@ -50,20 +134,19 @@ class Poisson(NoiseDistribution): """ return self.gp_link.transf(gp) - def _dmean_dgp(self,gp): - return self.gp_link.dtransf_df(gp) - - def _d2mean_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) - def _variance(self,gp): """ Mass (or density) function """ return self.gp_link.transf(gp) - def _dvariance_dgp(self,gp): - return self.gp_link.dtransf_df(gp) + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. - def _d2variance_dgp2(self,gp): - return self.gp_link.d2transf_df2(gp) + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + Ysim = np.random.poisson(self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py new file mode 100644 index 00000000..daad7186 --- /dev/null +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -0,0 +1,277 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats, special +import scipy as sp +import gp_transformations +from noise_distributions import NoiseDistribution +from scipy import stats, integrate +from scipy.special import gammaln, gamma + +class StudentT(NoiseDistribution): + """ + Student T likelihood + + For nomanclature see Bayesian Data Analysis 2003 p576 + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + """ + def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): + self.v = deg_free + self.sigma2 = sigma2 + + self._set_params(np.asarray(sigma2)) + super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance) + self.log_concave = False + + def _get_params(self): + return np.asarray(self.sigma2) + + def _get_param_names(self): + return ["t_noise_std2"] + + def _set_params(self, x): + self.sigma2 = float(x) + + @property + def variance(self, extra_data=None): + return (self.v / float(self.v - 2)) * self.sigma2 + + def pdf_link(self, link_f, y, extra_data=None): + """ + Likelihood function given link(f) + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + #Careful gamma(big_number) is infinity! + objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) + / (np.sqrt(self.v * np.pi * self.sigma2))) + * ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1))) + ) + return np.prod(objective) + + def logpdf_link(self, link_f, y, extra_data=None): + """ + Log Likelihood Function given link(f) + + .. math:: + \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) + + :param link_f: latent variables (link(f)) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + objective = (+ gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + - 0.5*np.log(self.sigma2 * self.v * np.pi) + - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) + ) + return np.sum(objective) + + def dlogpdf_dlink(self, link_f, y, extra_data=None): + """ + Gradient of the log likelihood function at y, given link(f) w.r.t link(f) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} + + :param link_f: latent variables (f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: gradient of likelihood evaluated at points + :rtype: Nx1 array + + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) + return grad + + def d2logpdf_dlink2(self, link_f, y, extra_data=None): + """ + Hessian at y, given link(f), w.r.t link(f) + i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) + The hessian will be 0 unless i == j + + .. math:: + \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) + return hess + + def d3logpdf_dlink3(self, link_f, y, extra_data=None): + """ + Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + + .. math:: + \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: third derivative of likelihood evaluated at points f + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / + ((e**2 + self.sigma2*self.v)**3) + ) + return d3lik_dlink3 + + def dlogpdf_link_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: float + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) + return np.sum(dlogpdf_dvar) + + def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): + """ + Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} + + :param link_f: latent variables link_f + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of likelihood evaluated at points f w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None): + """ + Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) + + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} + + :param link_f: latent variables link(f) + :type link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param extra_data: extra_data which is not used in student t distribution + :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter + :rtype: Nx1 array + """ + assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape + e = y - link_f + d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) + / ((self.sigma2*self.v + (e**2))**3) + ) + return d2logpdf_dlink2_dvar + + def dlogpdf_link_dtheta(self, f, y, extra_data=None): + dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) + return np.asarray([[dlogpdf_dvar]]) + + def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): + dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) + return dlogpdf_dlink_dvar + + def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): + d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) + return d2logpdf_dlink2_dvar + + def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None): + """ + Compute predictive variance of student_t*normal p(y*|f*)p(f*) + + Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*) + (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2)) + *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2))) + """ + + #FIXME: Not correct + #We want the variance around test points y which comes from int p(y*|f*)p(f*) df* + #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] + #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this + #Which was also given to us as (var) + #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution + #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom + true_var = 1/(1/sigma**2 + 1/self.variance) + + return true_var + + def _predictive_mean_analytical(self, mu, sigma): + """ + Compute mean of the prediction + """ + #FIXME: Not correct + return mu + + def samples(self, gp): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + #FIXME: Very slow as we are computing a new random variable per input! + #Can't get it to sample all at the same time + #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) + dfs = np.ones_like(gp)*self.v + scales = np.ones_like(gp)*np.sqrt(self.sigma2) + student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), + scale=scales) + return student_t_samples.reshape(orig_shape) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index d4d29711..21b46a8a 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -49,18 +49,6 @@ class BayesianGPLVM(SparseGP, GPLVM): SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) self.ensure_default_constraints() - def getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return SparseGP.getstate(self) + [self.init] - - def setstate(self, state): - self._const_jitter = None - self.init = state.pop() - SparseGP.setstate(self, state) - def _get_param_names(self): X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) @@ -285,6 +273,19 @@ class BayesianGPLVM(SparseGP, GPLVM): fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95)) return fig + def getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return SparseGP.getstate(self) + [self.init] + + def setstate(self, state): + self._const_jitter = None + self.init = state.pop() + SparseGP.setstate(self, state) + + def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ objective function for fitting the latent variables for test points diff --git a/GPy/models/bcgplvm.py b/GPy/models/bcgplvm.py index 9f5866c3..92db6953 100644 --- a/GPy/models/bcgplvm.py +++ b/GPy/models/bcgplvm.py @@ -7,7 +7,7 @@ import pylab as pb import sys, pdb from ..core import GP from ..models import GPLVM -from ..mappings import * +from ..mappings import Kernel class BCGPLVM(GPLVM): diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py index ee92a1b4..0aa21db9 100644 --- a/GPy/models/fitc_classification.py +++ b/GPy/models/fitc_classification.py @@ -16,7 +16,7 @@ class FITCClassification(FITC): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to Binomial with probit link function + :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class FITCClassification(FITC): kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index fce51cfa..7fc61bb7 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -15,7 +15,7 @@ class GPClassification(GP): :param X: input observations :param Y: observed values, can be None if likelihood is not None - :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function :param kernel: a GPy kernel, defaults to rbf :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 86e1f7de..633fc1c8 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -25,11 +25,12 @@ class GPRegression(GP): """ - def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False): + def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, likelihood=None): if kernel is None: kernel = kern.rbf(X.shape[1]) - likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) + if likelihood is None: + likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self.ensure_default_constraints() @@ -39,5 +40,3 @@ class GPRegression(GP): def setstate(self, state): return GP.setstate(self, state) - - pass diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index ad78d51f..795389a7 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -44,12 +44,6 @@ class GPLVM(GP): Xr[:PC.shape[0], :PC.shape[1]] = PC return Xr - def getstate(self): - return GP.getstate(self) - - def setstate(self, state): - GP.setstate(self, state) - def _get_param_names(self): return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self) @@ -68,7 +62,7 @@ class GPLVM(GP): def jacobian(self,X): target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) for i in range(self.output_dim): - target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) + target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) return target def magnification(self,X): @@ -91,3 +85,11 @@ class GPLVM(GP): def plot_magnification(self, *args, **kwargs): return util.plot_latent.plot_magnification(self, *args, **kwargs) + + def getstate(self): + return GP.getstate(self) + + def setstate(self, state): + GP.setstate(self, state) + + diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py index 5afcd7c4..64b8b2fb 100644 --- a/GPy/models/gradient_checker.py +++ b/GPy/models/gradient_checker.py @@ -26,40 +26,40 @@ class GradientChecker(Model): """ :param f: Function to check gradient for :param df: Gradient of function to check - :param x0: + :param x0: Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names). - Can be a list of arrays, if takes a list of arrays. This list will be passed + Can be a list of arrays, if takes a list of arrays. This list will be passed to f and df in the same order as given here. If only one argument, make sure not to pass a list!!! - + :type x0: [array-like] | array-like | float | int :param names: Names to print, when performing gradcheck. If a list was passed to x0 a list of names with the same length is expected. :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs) - + Examples: --------- from GPy.models import GradientChecker N, M, Q = 10, 5, 3 - + Sinusoid: - + X = numpy.random.rand(N, Q) grad = GradientChecker(numpy.sin,numpy.cos,X,'x') grad.checkgrad(verbose=1) - + Using GPy: - + X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q) kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) - grad = GradientChecker(kern.K, + grad = GradientChecker(kern.K, lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x), x0 = X.copy(), - names='X') + names='X') grad.checkgrad(verbose=1) grad.randomize() - grad.checkgrad(verbose=1) + grad.checkgrad(verbose=1) """ Model.__init__(self) if isinstance(x0, (list, tuple)) and names is None: @@ -75,14 +75,14 @@ class GradientChecker(Model): self.names = names self.shapes = [get_shape(x0)] for name, xi in zip(self.names, at_least_one_element(x0)): - self.__setattr__(name, xi) + self.__setattr__(name, numpy.float_(xi)) # self._param_names = [] # for name, shape in zip(self.names, self.shapes): # self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) self.args = args self.kwargs = kwargs - self.f = f - self.df = df + self._f = f + self._df = df def _get_x(self): if len(self.names) > 1: @@ -90,10 +90,10 @@ class GradientChecker(Model): return [self.__getattribute__(self.names[0])] + list(self.args) def log_likelihood(self): - return float(numpy.sum(self.f(*self._get_x(), **self.kwargs))) + return float(numpy.sum(self._f(*self._get_x(), **self.kwargs))) def _log_likelihood_gradients(self): - return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten() + return numpy.atleast_1d(self._df(*self._get_x(), **self.kwargs)).flatten() def _get_params(self): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index be191e9b..2aaa731c 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -81,29 +81,6 @@ class MRD(Model): Model.__init__(self) self.ensure_default_constraints() - def getstate(self): - return Model.getstate(self) + [self.names, - self.bgplvms, - self.gref, - self.nparams, - self.input_dim, - self.num_inducing, - self.num_data, - self.NQ, - self.MQ] - - def setstate(self, state): - self.MQ = state.pop() - self.NQ = state.pop() - self.num_data = state.pop() - self.num_inducing = state.pop() - self.input_dim = state.pop() - self.nparams = state.pop() - self.gref = state.pop() - self.bgplvms = state.pop() - self.names = state.pop() - Model.setstate(self, state) - @property def X(self): return self.gref.X @@ -211,8 +188,8 @@ class MRD(Model): # g.Z = Z.reshape(self.num_inducing, self.input_dim) # # def _set_kern_params(self, g, p): -# g.kern._set_params(p[:g.kern.Nparam]) -# g.likelihood._set_params(p[g.kern.Nparam:]) +# g.kern._set_params(p[:g.kern.num_params]) +# g.likelihood._set_params(p[g.kern.num_params:]) def _set_params(self, x): start = 0; end = self.NQ @@ -371,4 +348,28 @@ class MRD(Model): pylab.draw() fig.tight_layout() + def getstate(self): + return Model.getstate(self) + [self.names, + self.bgplvms, + self.gref, + self.nparams, + self.input_dim, + self.num_inducing, + self.num_data, + self.NQ, + self.MQ] + + def setstate(self, state): + self.MQ = state.pop() + self.NQ = state.pop() + self.num_data = state.pop() + self.num_inducing = state.pop() + self.input_dim = state.pop() + self.nparams = state.pop() + self.gref = state.pop() + self.bgplvms = state.pop() + self.names = state.pop() + Model.setstate(self, state) + + diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 50c2f935..9274aacc 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP): kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py index 4d22c619..e826bf35 100644 --- a/GPy/models/svigp_regression.py +++ b/GPy/models/svigp_regression.py @@ -25,7 +25,7 @@ class SVIGPRegression(SVIGP): """ - def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10): + def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False): # kern defaults to rbf (plus white for stability) if kernel is None: kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3) @@ -38,7 +38,7 @@ class SVIGPRegression(SVIGP): assert Z.shape[1] == X.shape[1] # likelihood defaults to Gaussian - likelihood = likelihoods.Gaussian(Y, normalize=False) + likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize) self.load_batch() diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py index 989251a7..a525b1c9 100644 --- a/GPy/testing/examples_tests.py +++ b/GPy/testing/examples_tests.py @@ -37,7 +37,7 @@ def model_checkgrads(model): def model_instance(model): #assert isinstance(model, GPy.core.model) - return isinstance(model, GPy.core.model) + return isinstance(model, GPy.core.model.Model) @nottest def test_models(): @@ -54,7 +54,7 @@ def test_models(): print "After" print functions for example in functions: - if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']: + if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100', 'brendan_faces']: print "SKIPPING" continue diff --git a/GPy/testing/gp_transformation_tests.py b/GPy/testing/gp_transformation_tests.py new file mode 100644 index 00000000..42c0414b --- /dev/null +++ b/GPy/testing/gp_transformation_tests.py @@ -0,0 +1,61 @@ +from nose.tools import with_setup +from GPy.models import GradientChecker +from GPy.likelihoods.noise_models import gp_transformations +import inspect +import unittest +import numpy as np + +class TestTransformations(object): + """ + Generic transformations checker + """ + def setUp(self): + N = 30 + self.fs = [np.random.rand(N, 1), float(np.random.rand(1))] + + + def tearDown(self): + self.fs = None + + def test_transformations(self): + self.setUp() + transformations = [gp_transformations.Identity(), + gp_transformations.Log(), + gp_transformations.Probit(), + gp_transformations.Log_ex_1(), + gp_transformations.Reciprocal(), + ] + + for transformation in transformations: + for f in self.fs: + yield self.t_dtransf_df, transformation, f + yield self.t_d2transf_df2, transformation, f + yield self.t_d3transf_df3, transformation, f + + @with_setup(setUp, tearDown) + def t_dtransf_df(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.transf, transformation.dtransf_df, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2transf_df2(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.dtransf_df, transformation.d2transf_df2, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3transf_df3(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.d2transf_df2, transformation.d3transf_df3, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + +#if __name__ == "__main__": + #print "Running unit tests" + #unittest.main() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 87d4a20e..f64dac2b 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -7,6 +7,13 @@ import GPy verbose = False +try: + import sympy + SYMPY_AVAILABLE=True +except ImportError: + SYMPY_AVAILABLE=False + + class KernelTests(unittest.TestCase): def test_kerneltie(self): K = GPy.kern.rbf(5, ARD=True) @@ -22,7 +29,16 @@ class KernelTests(unittest.TestCase): self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_rbf_sympykernel(self): - kern = GPy.kern.rbf_sympy(5) + if SYMPY_AVAILABLE: + kern = GPy.kern.rbf_sympy(5) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_eq_sympykernel(self): + kern = GPy.kern.eq_sympy(5, 3, output_ind=4) + self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + + def test_sinckernel(self): + kern = GPy.kern.sinc(5) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_rbf_invkernel(self): diff --git a/GPy/testing/likelihoods_tests.py b/GPy/testing/likelihoods_tests.py new file mode 100644 index 00000000..8d1466fb --- /dev/null +++ b/GPy/testing/likelihoods_tests.py @@ -0,0 +1,577 @@ +import numpy as np +import unittest +import GPy +from GPy.models import GradientChecker +import functools +import inspect +from GPy.likelihoods.noise_models import gp_transformations +from functools import partial + +def dparam_partial(inst_func, *args): + """ + If we have a instance method that needs to be called but that doesn't + take the parameter we wish to change to checkgrad, then this function + will change the variable using set params. + + inst_func: should be a instance function of an object that we would like + to change + param: the param that will be given to set_params + args: anything else that needs to be given to the function (for example + the f or Y that are being used in the function whilst we tweak the + param + """ + def param_func(param, inst_func, args): + inst_func.im_self._set_params(param) + return inst_func(*args) + return functools.partial(param_func, inst_func=inst_func, args=args) + +def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False): + """ + checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N + However if we are holding other parameters fixed and moving something else + We need to check the gradient of each of the fixed parameters + (f and y for example) seperately, whilst moving another parameter. + Otherwise f: gives back R^N and + df: gives back R^NxM where M is + The number of parameters and N is the number of data + Need to take a slice out from f and a slice out of df + """ + #print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, + #func.__name__, dfunc.__name__) + partial_f = dparam_partial(func, *args) + partial_df = dparam_partial(dfunc, *args) + gradchecking = True + for param in params: + fnum = np.atleast_1d(partial_f(param)).shape[0] + dfnum = np.atleast_1d(partial_df(param)).shape[0] + for fixed_val in range(dfnum): + #dlik and dlik_dvar gives back 1 value for each + f_ind = min(fnum, fixed_val+1) - 1 + print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val) + #Make grad checker with this param moving, note that set_params is NOT being called + #The parameter is being set directly with __setattr__ + grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], + lambda x : np.atleast_1d(partial_df(x))[fixed_val], + param, 'p') + #This is not general for more than one param... + if constraints is not None: + for constraint in constraints: + constraint('p', grad) + if randomize: + grad.randomize() + if verbose: + print grad + grad.checkgrad(verbose=1) + if not grad.checkgrad(): + gradchecking = False + + return gradchecking + + +from nose.tools import with_setup +class TestNoiseModels(object): + """ + Generic model checker + """ + def setUp(self): + self.N = 5 + self.D = 3 + self.X = np.random.rand(self.N, self.D)*10 + + self.real_std = 0.1 + noise = np.random.randn(*self.X[:, 0].shape)*self.real_std + self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] + self.f = np.random.rand(self.N, 1) + self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] + self.positive_Y = np.exp(self.Y.copy()) + tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None] + self.integer_Y = np.where(tmp > 0, tmp, 0) + + self.var = 0.2 + + self.var = np.random.rand(1) + + #Make a bigger step as lower bound can be quite curved + self.step = 1e-3 + + def tearDown(self): + self.Y = None + self.f = None + self.X = None + + def test_noise_models(self): + self.setUp() + + #################################################### + # Constraint wrappers so we can just list them off # + #################################################### + def constrain_negative(regex, model): + model.constrain_negative(regex) + + def constrain_positive(regex, model): + model.constrain_positive(regex) + + def constrain_bounded(regex, model, lower, upper): + """ + Used like: partial(constrain_bounded, lower=0, upper=1) + """ + model.constrain_bounded(regex, lower, upper) + + """ + Dictionary where we nest models we would like to check + Name: { + "model": model_instance, + "grad_params": { + "names": [names_of_params_we_want, to_grad_check], + "vals": [values_of_params, to_start_at], + "constrain": [constraint_wrappers, listed_here] + }, + "laplace": boolean_of_whether_model_should_work_for_laplace, + "ep": boolean_of_whether_model_should_work_for_laplace, + "link_f_constraints": [constraint_wrappers, listed_here] + } + """ + noise_models = {"Student_t_default": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_1_var": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [1], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_small_var": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [0.01], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_approx_gauss": { + "model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_log": { + "model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Gaussian_default": { + "model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N), + "grad_params": { + "names": ["noise_model_variance"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True, + "ep": True + }, + "Gaussian_log": { + "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N), + "grad_params": { + "names": ["noise_model_variance"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Gaussian_probit": { + "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N), + "grad_params": { + "names": ["noise_model_variance"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Gaussian_log_ex": { + "model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N), + "grad_params": { + "names": ["noise_model_variance"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Bernoulli_default": { + "model": GPy.likelihoods.bernoulli(), + "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], + "laplace": True, + "Y": self.binary_Y, + "ep": True + }, + "Exponential_default": { + "model": GPy.likelihoods.exponential(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True, + }, + "Poisson_default": { + "model": GPy.likelihoods.poisson(), + "link_f_constraints": [constrain_positive], + "Y": self.integer_Y, + "laplace": True, + "ep": False #Should work though... + }, + "Gamma_default": { + "model": GPy.likelihoods.gamma(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True + } + } + + for name, attributes in noise_models.iteritems(): + model = attributes["model"] + if "grad_params" in attributes: + params = attributes["grad_params"] + param_vals = params["vals"] + param_names= params["names"] + param_constraints = params["constraints"] + else: + params = [] + param_vals = [] + param_names = [] + constrain_positive = [] + if "link_f_constraints" in attributes: + link_f_constraints = attributes["link_f_constraints"] + else: + link_f_constraints = [] + if "Y" in attributes: + Y = attributes["Y"].copy() + else: + Y = self.Y.copy() + if "f" in attributes: + f = attributes["f"].copy() + else: + f = self.f.copy() + if "laplace" in attributes: + laplace = attributes["laplace"] + else: + laplace = False + if "ep" in attributes: + ep = attributes["ep"] + else: + ep = False + + if len(param_vals) > 1: + raise NotImplementedError("Cannot support multiple params in likelihood yet!") + + #Required by all + #Normal derivatives + yield self.t_logpdf, model, Y, f + yield self.t_dlogpdf_df, model, Y, f + yield self.t_d2logpdf_df2, model, Y, f + #Link derivatives + yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints + yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints + if laplace: + #Laplace only derivatives + yield self.t_d3logpdf_df3, model, Y, f + yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints + #Params + yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints + yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints + #Link params + yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints + yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints + + #laplace likelihood gradcheck + yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + if ep: + #ep likelihood gradcheck + yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + + + self.tearDown() + + ############# + # dpdf_df's # + ############# + @with_setup(setUp, tearDown) + def t_logpdf(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + print model + np.testing.assert_almost_equal( + np.log(model.pdf(f.copy(), Y.copy())), + model.logpdf(f.copy(), Y.copy())) + + @with_setup(setUp, tearDown) + def t_dlogpdf_df(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + self.description = "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(model.logpdf, y=Y) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) + grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2logpdf_df2(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3logpdf_df3(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) + d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y) + grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + ############## + # df_dparams # + ############## + @with_setup(setUp, tearDown) + def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + ################ + # dpdf_dlink's # + ################ + @with_setup(setUp, tearDown) + def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(model.logpdf_link, y=Y) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) + grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + print grad + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) + grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + grad.checkgrad(verbose=1) + print grad + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) + d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y) + grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + grad.checkgrad(verbose=1) + print grad + assert grad.checkgrad() + + ################# + # dlink_dparams # + ################# + @with_setup(setUp, tearDown) + def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + ################ + # laplace test # + ################ + @with_setup(setUp, tearDown) + def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + print "\n{}".format(inspect.stack()[0][3]) + #Normalize + Y = Y/Y.max() + white_var = 0.001 + kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model) + m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood) + m.ensure_default_constraints() + m.constrain_fixed('white', white_var) + + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + constraints[param_num](name, m) + + m.randomize() + m.checkgrad(verbose=1, step=step) + print m + assert m.checkgrad(step=step) + + ########### + # EP test # + ########### + @with_setup(setUp, tearDown) + def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + print "\n{}".format(inspect.stack()[0][3]) + #Normalize + Y = Y/Y.max() + white_var = 0.001 + kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + ep_likelihood = GPy.likelihoods.EP(Y.copy(), model) + m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood) + m.ensure_default_constraints() + m.constrain_fixed('white', white_var) + + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + constraints[param_num](name, m) + + m.randomize() + m.checkgrad(verbose=1, step=step) + print m + assert m.checkgrad(step=step) + + +class LaplaceTests(unittest.TestCase): + """ + Specific likelihood tests, not general enough for the above tests + """ + + def setUp(self): + self.N = 5 + self.D = 3 + self.X = np.random.rand(self.N, self.D)*10 + + self.real_std = 0.1 + noise = np.random.randn(*self.X[:, 0].shape)*self.real_std + self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] + self.f = np.random.rand(self.N, 1) + + self.var = 0.2 + + self.var = np.random.rand(1) + self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var) + self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N) + + #Make a bigger step as lower bound can be quite curved + self.step = 1e-6 + + def tearDown(self): + self.stu_t = None + self.gauss = None + self.Y = None + self.f = None + self.X = None + + def test_gaussian_d2logpdf_df2_2(self): + print "\n{}".format(inspect.stack()[0][3]) + self.Y = None + self.gauss = None + + self.N = 2 + self.D = 1 + self.X = np.linspace(0, self.D, self.N)[:, None] + self.real_std = 0.2 + noise = np.random.randn(*self.X.shape)*self.real_std + self.Y = np.sin(self.X*2*np.pi) + noise + self.f = np.random.rand(self.N, 1) + self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) + + dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) + d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + +if __name__ == "__main__": + print "Running unit tests" + unittest.main() diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py index 30ca14d6..16904927 100644 --- a/GPy/testing/psi_stat_expectation_tests.py +++ b/GPy/testing/psi_stat_expectation_tests.py @@ -28,8 +28,8 @@ def ard(p): class Test(unittest.TestCase): input_dim = 9 num_inducing = 4 - N = 3 - Nsamples = 5e6 + N = 30 + Nsamples = 9e6 def setUp(self): i_s_dim_list = [2,4,3] @@ -45,20 +45,26 @@ class Test(unittest.TestCase): input_slices = input_slices ) self.kerns = ( - input_slice_kern, +# input_slice_kern, # (GPy.kern.rbf(self.input_dim, ARD=True) + # GPy.kern.linear(self.input_dim, ARD=True) + # GPy.kern.bias(self.input_dim) + # GPy.kern.white(self.input_dim)), # (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + -# GPy.kern.bias(self.input_dim) + -# GPy.kern.white(self.input_dim)), -# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), +# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), + (GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim, np.random.rand()) + + GPy.kern.white(self.input_dim, np.random.rand())), + (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim, np.random.rand()) + + GPy.kern.white(self.input_dim, np.random.rand())), +# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), -# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim), @@ -79,7 +85,7 @@ class Test(unittest.TestCase): def test_psi1(self): for kern in self.kerns: - Nsamples = np.floor(self.Nsamples/300.) + Nsamples = np.floor(self.Nsamples/self.N) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] @@ -105,7 +111,7 @@ class Test(unittest.TestCase): def test_psi2(self): for kern in self.kerns: - Nsamples = self.Nsamples/300. + Nsamples = int(np.floor(self.Nsamples/self.N)) psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] @@ -119,10 +125,10 @@ class Test(unittest.TestCase): try: import pylab pylab.figure(msg) - pylab.plot(diffs) + pylab.plot(diffs, marker='x', mew=1.3) # print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) - self.assertTrue(np.allclose(psi2.squeeze(), K_, - rtol=1e-1, atol=.1), + self.assertTrue(np.allclose(psi2.squeeze(), K_), + #rtol=1e-1, atol=.1), msg=msg + ": not matching") # sys.stdout.write(".") except: @@ -135,7 +141,7 @@ class Test(unittest.TestCase): if __name__ == "__main__": sys.argv = ['', #'Test.test_psi0', - 'Test.test_psi1', + #'Test.test_psi1', 'Test.test_psi2', ] unittest.main() diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index e4d9e063..818cb56e 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase): Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) - #distribution = GPy.likelihoods.likelihood_functions.Binomial() + #distribution = GPy.likelihoods.likelihood_functions.Bernoulli() #likelihood = GPy.likelihoods.EP(Y, distribution) #m = GPy.core.SparseGP(X, likelihood, kernel, Z) #m.ensure_default_constraints() diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 99548268..db9b7362 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -14,3 +14,5 @@ import visualize import decorators import classification import latent_space_visualizations + +import netpbmfile diff --git a/GPy/util/config.py b/GPy/util/config.py new file mode 100644 index 00000000..d2ed7543 --- /dev/null +++ b/GPy/util/config.py @@ -0,0 +1,17 @@ +# +# This loads the configuration +# +import ConfigParser +import os +config = ConfigParser.ConfigParser() + +user_file = os.path.join(os.getenv('HOME'),'.gpy_config.cfg') +default_file = os.path.join('..','gpy_config.cfg') + +# 1. check if the user has a ~/.gpy_config.cfg +if os.path.isfile(user_file): + config.read(user_file) +else: + # 2. if not, use the default one + path = os.path.dirname(__file__) + config.read(os.path.join(path,default_file)) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 79bc3fc3..565f8e76 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -8,40 +8,38 @@ import zipfile import tarfile import datetime -ipython_notebook = False -if ipython_notebook: - import IPython.core.display - def ipynb_input(varname, prompt=''): - """Prompt user for input and assign string val to given variable name.""" - js_code = (""" - var value = prompt("{prompt}",""); - var py_code = "{varname} = '" + value + "'"; - IPython.notebook.kernel.execute(py_code); - """).format(prompt=prompt, varname=varname) - return IPython.core.display.Javascript(js_code) +ipython_available=True +try: + import IPython +except ImportError: + ipython_available=False + import sys, urllib -def reporthook(a,b,c): +def reporthook(a,b,c): # ',' at the end of the line is important! #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c), #you can also use sys.stdout.write sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c)) sys.stdout.flush() - + # Global variables data_path = os.path.join(os.path.dirname(__file__), 'datasets') default_seed = 10000 overide_manual_authorize=False neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' +sam_url = 'http://www.cs.nyu.edu/~roweis/data/' cmu_url = 'http://mocap.cs.cmu.edu/subjects/' -# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look. + +# Note: there may be a better way of storing data resources, for the +# moment we are storing them in a dictionary. data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], 'files' : [['ankurDataPoseSilhouette.mat']], 'license' : None, 'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""", 'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""}, - + 'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'], 'files' : [['Index', 'housing.data', 'housing.names']], 'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""", @@ -49,7 +47,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], 'license' : None, 'size' : 51276 }, - 'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'], + 'brendan_faces' : {'urls' : [sam_url], 'files': [['frey_rawface.mat']], 'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.', 'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""", @@ -93,6 +91,12 @@ The database was created with funding from NSF EIA-0196217.""", 'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""", 'license' : None, 'size' : 21949154}, + 'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url], + 'files' : [['att_faces.zip'], ['olivettifaces.mat']], + 'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994', + 'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """, + 'license': None, + 'size' : 8561331}, 'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'], 'files' : [['olympicMarathonTimes.csv']], 'citation' : None, @@ -141,26 +145,41 @@ The database was created with funding from NSF EIA-0196217.""", 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', 'license' : None, 'size' : 24229368}, + 'xw_pen' : {'urls' : [neil_url + 'xw_pen/'], + 'files' : [['xw_pen_15.csv']], + 'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""", + 'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005', + 'license' : None, + 'size' : 3410} } -def prompt_user(): +def prompt_user(prompt): """Ask user for agreeing to data set licenses.""" # raw_input returns the empty string for "enter" yes = set(['yes', 'y']) no = set(['no','n']) - choice = '' - if ipython_notebook: - ipynb_input(choice, prompt='provide your answer here') - else: + + try: + print(prompt) choice = raw_input().lower() + # would like to test for exception here, but not sure if we can do that without importing IPython + except: + print('Stdin is not implemented.') + print('You need to set') + print('overide_manual_authorize=True') + print('to proceed with the download. Please set that variable and continue.') + raise + + if choice in yes: return True elif choice in no: return False else: - sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'") - return prompt_user() + print("Your response was a " + choice) + print("Please respond with 'yes', 'y' or 'no', 'n'") + #return prompt_user() def data_available(dataset_name=None): @@ -170,7 +189,7 @@ def data_available(dataset_name=None): if not os.path.exists(os.path.join(data_path, dataset_name, file)): return False return True - + def download_url(url, store_directory, save_name = None, messages = True, suffix=''): """Download a file from a url and save it to disk.""" i = url.rfind('/') @@ -212,15 +231,14 @@ def authorize_download(dataset_name=None): print('You must also agree to the following license:') print(dr['license']) print('') - print('Do you wish to proceed with the download? [yes/no]') - return prompt_user() + return prompt_user('Do you wish to proceed with the download? [yes/no]') def download_data(dataset_name=None): """Check with the user that the are happy with terms and conditions for the data set, then download it.""" dr = data_resources[dataset_name] if not authorize_download(dataset_name): - return False + raise Exception("Permission to download data set denied.") if dr.has_key('suffices'): for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']): @@ -231,18 +249,18 @@ def download_data(dataset_name=None): for file in files: download_url(os.path.join(url,file), dataset_name, dataset_name) return True - + def data_details_return(data, data_set): """Update the data component of the data dictionary with details drawn from the data_resources.""" data.update(data_resources[data_set]) return data - + def cmu_urls_files(subj_motions, messages = True): ''' - Find which resources are missing on the local disk for the requested CMU motion capture motions. + Find which resources are missing on the local disk for the requested CMU motion capture motions. ''' - + subjects_num = subj_motions[0] motions_num = subj_motions[1] @@ -262,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True): motions[i].append(curMot) all_skels = [] - + assert len(subjects) == len(motions) - + all_motions = [] - + for i in range(len(subjects)): skel_dir = os.path.join(data_path, 'cmu_mocap') cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf') - + url_required = False file_download = [] if not os.path.exists(cur_skel_file): @@ -314,7 +332,7 @@ if gpxpy_available: points = [point for track in gpx.tracks for segment in track.segments for point in segment.points] data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points] X.append(np.asarray(data)[::sample_every, :]) - gpx_file.close() + gpx_file.close() return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set) del gpxpy_available @@ -390,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'): return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set) #else: # throw an error - + def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'): np.random.seed(seed=seed) data = oil() @@ -489,13 +507,13 @@ def ripley_synth(data_set='ripley_prnn_data'): return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) def osu_run1(data_set='osu_run1', sample_every=4): + path = os.path.join(data_path, data_set) if not data_available(data_set): download_data(data_set) - zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r') - path = os.path.join(data_path, data_set) - for name in zip.namelist(): - zip.extract(name, path) - Y, connect = GPy.util.mocap.load_text_data('Aug210107', path) + zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r') + for name in zip.namelist(): + zip.extract(name, path) + Y, connect = GPy.util.mocap.load_text_data('Aug210106', path) Y = Y[0:-1:sample_every, :] return data_details_return({'Y': Y, 'connect' : connect}, data_set) @@ -579,8 +597,34 @@ def toy_linear_1d_classification(seed=default_seed): X = (np.r_[x1, x2])[:, None] return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X, 'seed' : seed} -def olympic_100m_men(data_set='rogers_girolami_data'): +def olivetti_faces(data_set='olivetti_faces'): + path = os.path.join(data_path, data_set) if not data_available(data_set): + download_data(data_set) + zip = zipfile.ZipFile(os.path.join(path, 'att_faces.zip'), 'r') + for name in zip.namelist(): + zip.extract(name, path) + Y = [] + lbls = [] + for subject in range(40): + for image in range(10): + image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm') + Y.append(GPy.util.netpbmfile.imread(image_path).flatten()) + lbls.append(subject) + Y = np.asarray(Y) + lbls = np.asarray(lbls)[:, None] + return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set) + +def xw_pen(data_set='xw_pen'): + if not data_available(data_set): + download_data(data_set) + Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',') + X = np.arange(485)[:, None] + return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set) + + +def download_rogers_girolami_data(): + if not data_available('rogers_girolami_data'): download_data(data_set) path = os.path.join(data_path, data_set) tar_file = os.path.join(path, 'firstcoursemldata.tar.gz') @@ -588,6 +632,9 @@ def olympic_100m_men(data_set='rogers_girolami_data'): print('Extracting file.') tar.extractall(path=path) tar.close() + +def olympic_100m_men(data_set='rogers_girolami_data'): + download_rogers_girolami_data() olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100'] X = olympic_data[:, 0][:, None] @@ -595,20 +642,45 @@ def olympic_100m_men(data_set='rogers_girolami_data'): return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set) def olympic_100m_women(data_set='rogers_girolami_data'): - if not data_available(data_set): - download_data(data_set) - path = os.path.join(data_path, data_set) - tar_file = os.path.join(path, 'firstcoursemldata.tar.gz') - tar = tarfile.open(tar_file) - print('Extracting file.') - tar.extractall(path=path) - tar.close() + download_rogers_girolami_data() olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100'] X = olympic_data[:, 0][:, None] Y = olympic_data[:, 1][:, None] return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set) +def olympic_200m_women(data_set='rogers_girolami_data'): + download_rogers_girolami_data() + olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female200'] + + X = olympic_data[:, 0][:, None] + Y = olympic_data[:, 1][:, None] + return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set) + +def olympic_200m_men(data_set='rogers_girolami_data'): + download_rogers_girolami_data() + olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male200'] + + X = olympic_data[:, 0][:, None] + Y = olympic_data[:, 1][:, None] + return data_details_return({'X': X, 'Y': Y, 'info': "Male 200 m winning times for women from 1896 until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set) + +def olympic_400m_women(data_set='rogers_girolami_data'): + download_rogers_girolami_data() + olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female400'] + + X = olympic_data[:, 0][:, None] + Y = olympic_data[:, 1][:, None] + return data_details_return({'X': X, 'Y': Y, 'info': "Olympic 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set) + +def olympic_400m_men(data_set='rogers_girolami_data'): + download_rogers_girolami_data() + olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male400'] + + X = olympic_data[:, 0][:, None] + Y = olympic_data[:, 1][:, None] + return data_details_return({'X': X, 'Y': Y, 'info': "Male 400 m winning times for women until 2008. Data is from Rogers and Girolami's First Course in Machine Learning."}, data_set) + def olympic_marathon_men(data_set='olympic_marathon_men'): if not data_available(data_set): download_data(data_set) @@ -617,6 +689,26 @@ def olympic_marathon_men(data_set='olympic_marathon_men'): Y = olympics[:, 1:2] return data_details_return({'X': X, 'Y': Y}, data_set) +def olympics(): + """All olympics sprint winning times for multiple output prediction.""" + X = np.zeros((0, 2)) + Y = np.zeros((0, 1)) + for i, dataset in enumerate([olympic_100m_men, + olympic_100m_women, + olympic_200m_men, + olympic_200m_women, + olympic_400m_men, + olympic_400m_women]): + data = dataset() + year = data['X'] + time = data['Y'] + X = np.vstack((X, np.hstack((year, np.ones_like(year)*i)))) + Y = np.vstack((Y, time)) + data['X'] = X + data['Y'] = Y + data['info'] = "Olympics sprint event winning for men and women to 2008. Data is from Rogers and Girolami's First Course in Machine Learning." + return data + # def movielens_small(partNo=1,seed=default_seed): # np.random.seed(seed=seed) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4e7f7fff..f68e1a0b 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -61,6 +61,14 @@ def dpotri(A, lower=0): """ return lapack.dpotri(A, lower=lower) +def pddet(A): + """ + Determinant of a positive definite matrix, only symmetric matricies though + """ + L = jitchol(A) + logdetA = 2*sum(np.log(np.diag(L))) + return logdetA + def trace_dot(a, b): """ Efficiently compute the trace of the matrix product of a and b @@ -325,6 +333,7 @@ def symmetrify(A, upper=False): """ N, M = A.shape assert N == M + c_contig_code = """ int iN; for (int i=1; i + %s #include - """ + """ % pragma_string - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], - 'extra_link_args' : ['-lgomp']} + weave_options_openmp = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + weave_options_noopenmp = {'extra_compile_args': ['-O3']} + + if config.getboolean('parallel', 'openmp'): + weave_options = weave_options_openmp + else: + weave_options = weave_options_noopenmp value = False + if (A == None) and (B == None): return True elif ((A == None) and (B != None)) or ((A != None) and (B == None)): return False elif A.shape == B.shape: if A.ndim == 2: - N, D = A.shape - value = weave.inline(code2, support_code=support_code, libraries=['gomp'], + N, D = [int(i) for i in A.shape] + value = weave.inline(code2, support_code=support_code, arg_names=['A', 'B', 'N', 'D'], - type_converters=weave.converters.blitz,**weave_options) + type_converters=weave.converters.blitz, **weave_options) elif A.ndim == 3: - N, D, Q = A.shape - value = weave.inline(code3, support_code=support_code, libraries=['gomp'], + N, D, Q = [int(i) for i in A.shape] + value = weave.inline(code3, support_code=support_code, arg_names=['A', 'B', 'N', 'D', 'Q'], - type_converters=weave.converters.blitz,**weave_options) + type_converters=weave.converters.blitz, **weave_options) else: value = np.array_equal(A,B) diff --git a/GPy/util/netpbmfile.py b/GPy/util/netpbmfile.py new file mode 100644 index 00000000..030bd574 --- /dev/null +++ b/GPy/util/netpbmfile.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# netpbmfile.py + +# Copyright (c) 2011-2013, Christoph Gohlke +# Copyright (c) 2011-2013, The Regents of the University of California +# Produced at the Laboratory for Fluorescence Dynamics. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Read and write image data from respectively to Netpbm files. + +This implementation follows the Netpbm format specifications at +http://netpbm.sourceforge.net/doc/. No gamma correction is performed. + +The following image formats are supported: PBM (bi-level), PGM (grayscale), +PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only). + +:Author: + `Christoph Gohlke `_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2013.01.18 + +Requirements +------------ +* `CPython 2.7, 3.2 or 3.3 `_ +* `Numpy 1.7 `_ +* `Matplotlib 1.2 `_ (optional for plotting) + +Examples +-------- +>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) +>>> imsave('_tmp.pgm', im1) +>>> im2 = imread('_tmp.pgm') +>>> assert numpy.all(im1 == im2) + +""" + +from __future__ import division, print_function + +import sys +import re +import math +from copy import deepcopy + +import numpy + +__version__ = '2013.01.18' +__docformat__ = 'restructuredtext en' +__all__ = ['imread', 'imsave', 'NetpbmFile'] + + +def imread(filename, *args, **kwargs): + """Return image data from Netpbm file as numpy array. + + `args` and `kwargs` are arguments to NetpbmFile.asarray(). + + Examples + -------- + >>> image = imread('_tmp.pgm') + + """ + try: + netpbm = NetpbmFile(filename) + image = netpbm.asarray() + finally: + netpbm.close() + return image + + +def imsave(filename, data, maxval=None, pam=False): + """Write image data to Netpbm file. + + Examples + -------- + >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) + >>> imsave('_tmp.pgm', image) + + """ + try: + netpbm = NetpbmFile(data, maxval=maxval) + netpbm.write(filename, pam=pam) + finally: + netpbm.close() + + +class NetpbmFile(object): + """Read and write Netpbm PAM, PBM, PGM, PPM, files.""" + + _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB', + b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB', + b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'} + + def __init__(self, arg=None, **kwargs): + """Initialize instance from filename, open file, or numpy array.""" + for attr in ('header', 'magicnum', 'width', 'height', 'maxval', + 'depth', 'tupltypes', '_filename', '_fh', '_data'): + setattr(self, attr, None) + if arg is None: + self._fromdata([], **kwargs) + elif isinstance(arg, basestring): + self._fh = open(arg, 'rb') + self._filename = arg + self._fromfile(self._fh, **kwargs) + elif hasattr(arg, 'seek'): + self._fromfile(arg, **kwargs) + self._fh = arg + else: + self._fromdata(arg, **kwargs) + + def asarray(self, copy=True, cache=False, **kwargs): + """Return image data from file as numpy array.""" + data = self._data + if data is None: + data = self._read_data(self._fh, **kwargs) + if cache: + self._data = data + else: + return data + return deepcopy(data) if copy else data + + def write(self, arg, **kwargs): + """Write instance to file.""" + if hasattr(arg, 'seek'): + self._tofile(arg, **kwargs) + else: + with open(arg, 'wb') as fid: + self._tofile(fid, **kwargs) + + def close(self): + """Close open file. Future asarray calls might fail.""" + if self._filename and self._fh: + self._fh.close() + self._fh = None + + def __del__(self): + self.close() + + def _fromfile(self, fh): + """Initialize instance from open file.""" + fh.seek(0) + data = fh.read(4096) + if (len(data) < 7) or not (b'0' < data[1:2] < b'8'): + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + try: + self._read_pam_header(data) + except Exception: + try: + self._read_pnm_header(data) + except Exception: + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + + def _read_pam_header(self, data): + """Read PAM header and initialize instance.""" + regroups = re.search( + b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|" + b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|" + b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups() + self.header = regroups[0] + self.magicnum = b'P7' + for group in regroups[1:]: + key, value = group.split() + setattr(self, unicode(key).lower(), int(value)) + matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header) + self.tupltypes = [s.split(None, 1)[1] for s in matches] + + def _read_pnm_header(self, data): + """Read PNM header and initialize instance.""" + bpm = data[1:2] in b"14" + regroups = re.search(b"".join(( + b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm), + b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm + self.header = regroups[0] + self.magicnum = regroups[1] + self.width = int(regroups[2]) + self.height = int(regroups[3]) + self.maxval = int(regroups[4]) + self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1 + self.tupltypes = [self._types[self.magicnum]] + + def _read_data(self, fh, byteorder='>'): + """Return image data from open file as numpy array.""" + fh.seek(len(self.header)) + data = fh.read() + dtype = 'u1' if self.maxval < 256 else byteorder + 'u2' + depth = 1 if self.magicnum == b"P7 332" else self.depth + shape = [-1, self.height, self.width, depth] + size = numpy.prod(shape[1:]) + if self.magicnum in b"P1P2P3": + data = numpy.array(data.split(None, size)[:size], dtype) + data = data.reshape(shape) + elif self.maxval == 1: + shape[2] = int(math.ceil(self.width / 8)) + data = numpy.frombuffer(data, dtype).reshape(shape) + data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :] + else: + data = numpy.frombuffer(data, dtype) + data = data[:size * (data.size // size)].reshape(shape) + if data.shape[0] < 2: + data = data.reshape(data.shape[1:]) + if data.shape[-1] < 2: + data = data.reshape(data.shape[:-1]) + if self.magicnum == b"P7 332": + rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8) + rgb332 *= [36, 36, 85] + data = numpy.take(rgb332, data, axis=0) + return data + + def _fromdata(self, data, maxval=None): + """Initialize instance from numpy array.""" + data = numpy.array(data, ndmin=2, copy=True) + if data.dtype.kind not in "uib": + raise ValueError("not an integer type: %s" % data.dtype) + if data.dtype.kind == 'i' and numpy.min(data) < 0: + raise ValueError("data out of range: %i" % numpy.min(data)) + if maxval is None: + maxval = numpy.max(data) + maxval = 255 if maxval < 256 else 65535 + if maxval < 0 or maxval > 65535: + raise ValueError("data out of range: %i" % maxval) + data = data.astype('u1' if maxval < 256 else '>u2') + self._data = data + if data.ndim > 2 and data.shape[-1] in (3, 4): + self.depth = data.shape[-1] + self.width = data.shape[-2] + self.height = data.shape[-3] + self.magicnum = b'P7' if self.depth == 4 else b'P6' + else: + self.depth = 1 + self.width = data.shape[-1] + self.height = data.shape[-2] + self.magicnum = b'P5' if maxval > 1 else b'P4' + self.maxval = maxval + self.tupltypes = [self._types[self.magicnum]] + self.header = self._header() + + def _tofile(self, fh, pam=False): + """Write Netbm file.""" + fh.seek(0) + fh.write(self._header(pam)) + data = self.asarray(copy=False) + if self.maxval == 1: + data = numpy.packbits(data, axis=-1) + data.tofile(fh) + + def _header(self, pam=False): + """Return file header as byte string.""" + if pam or self.magicnum == b'P7': + header = "\n".join(( + "P7", + "HEIGHT %i" % self.height, + "WIDTH %i" % self.width, + "DEPTH %i" % self.depth, + "MAXVAL %i" % self.maxval, + "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes), + "ENDHDR\n")) + elif self.maxval == 1: + header = "P4 %i %i\n" % (self.width, self.height) + elif self.depth == 1: + header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval) + else: + header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval) + if sys.version_info[0] > 2: + header = bytes(header, 'ascii') + return header + + def __str__(self): + """Return information about instance.""" + return unicode(self.header) + + +if sys.version_info[0] > 2: + basestring = str + unicode = lambda x: str(x, 'ascii') + +if __name__ == "__main__": + # Show images specified on command line or all images in current directory + from glob import glob + from matplotlib import pyplot + files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m') + for fname in files: + try: + pam = NetpbmFile(fname) + img = pam.asarray(copy=False) + if False: + pam.write('_tmp.pgm.out', pam=True) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + imsave('_tmp.pgm.out', img) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + pam.close() + except ValueError as e: + print(fname, e) + continue + _shape = img.shape + if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)): + img = img[0] + cmap = 'gray' if pam.maxval > 1 else 'binary' + pyplot.imshow(img, cmap, interpolation='nearest') + pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum), + _shape, img.dtype)) + pyplot.show() diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index f4f5fda0..0b5ca381 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -1,32 +1,113 @@ -from sympy import Function, S, oo, I, cos, sin +from sympy import Function, S, oo, I, cos, sin, asin, log, erf,pi,exp +class ln_diff_erf(Function): + nargs = 2 + + def fdiff(self, argindex=2): + if argindex == 2: + x0, x1 = self.args + return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1))) + elif argindex == 1: + x0, x1 = self.args + return 2*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1))) + else: + raise ArgumentIndexError(self, argindex) + + @classmethod + def eval(cls, x0, x1): + if x0.is_Number and x1.is_Number: + return log(erf(x0)-erf(x1)) + +class sim_h(Function): + nargs = 5 + + def fdiff(self, argindex=1): + pass + + @classmethod + def eval(cls, t, tprime, d_i, d_j, l): + # putting in the is_Number stuff forces it to look for a fdiff method for derivative. + if (t.is_Number + and tprime.is_Number + and d_i.is_Number + and d_j.is_Number + and l.is_Number): + if (t is S.NaN + or tprime is S.NaN + or d_i is S.NaN + or d_j is S.NaN + or l is S.NaN): + return S.NaN + else: + return (exp((d_j/2*l)**2)/(d_i+d_j) + *(exp(-d_j*(tprime - t)) + *(erf((tprime-t)/l - d_j/2*l) + + erf(t/l + d_j/2*l)) + - exp(-(d_j*tprime + d_i)) + *(erf(tprime/l - d_j/2*l) + + erf(d_j/2*l)))) + +class erfc(Function): + nargs = 1 + + @classmethod + def eval(cls, arg): + return 1-erf(arg) + +class erfcx(Function): + nargs = 1 + + @classmethod + def eval(cls, arg): + return erfc(arg)*exp(arg*arg) + class sinc_grad(Function): nargs = 1 def fdiff(self, argindex=1): - return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x) + if argindex==1: + # Strictly speaking this should be computed separately, as it won't work when x=0. See http://calculus.subwiki.org/wiki/Sinc_function + return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x) + else: + raise ArgumentIndexError(self, argindex) + @classmethod def eval(cls, x): - if x is S.Zero: - return S.Zero - else: - return (x*cos(x) - sin(x))/(x*x) + if x.is_Number: + if x is S.NaN: + return S.NaN + elif x is S.Zero: + return S.Zero + else: + return (x*cos(x) - sin(x))/(x*x) class sinc(Function): nargs = 1 def fdiff(self, argindex=1): - return sinc_grad(self.args[0]) + if argindex==1: + return sinc_grad(self.args[0]) + else: + raise ArgumentIndexError(self, argindex) + @classmethod - def eval(cls, x): - if x is S.Zero: - return S.One - else: - return sin(x)/x - + def eval(cls, arg): + if arg.is_Number: + if arg is S.NaN: + return S.NaN + elif arg is S.Zero: + return S.One + else: + return sin(arg)/arg + + if arg.func is asin: + x = arg.args[0] + return x / arg + def _eval_is_real(self): return self.args[0].is_real + diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py index 5a5880d5..702ab25c 100644 --- a/GPy/util/univariate_Gaussian.py +++ b/GPy/util/univariate_Gaussian.py @@ -13,24 +13,32 @@ def std_norm_cdf(x): Cumulative standard Gaussian distribution Based on Abramowitz, M. and Stegun, I. (1970) """ + #Generalize for many x + x = np.asarray(x).copy() + cdf_x = np.zeros_like(x) + N = x.size support_code = "#include " code = """ - double sign = 1.0; - if (x < 0.0){ - sign = -1.0; - x = -x; + double sign, t, erf; + for (int i=0; i 1 and nImg.is_integer(): # Show a mosaic of images - nImg = np.int(nImg) - self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg)) - for iR in range(nImg): - for iC in range(nImg): - currImgId = iR*nImg + iC - currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F') - firstRow = iR*self.dimensions[0] - lastRow = (iR+1)*self.dimensions[0] - firstCol = iC*self.dimensions[1] - lastCol = (iC+1)*self.dimensions[1] - self.vals[firstRow:lastRow, firstCol:lastCol] = currImg + num_images = np.sqrt(vals[0,].size/dim) + if num_images > 1 and num_images.is_integer(): # Show a mosaic of images + num_images = np.int(num_images) + self.vals = np.zeros((self.dimensions[0]*num_images, self.dimensions[1]*num_images)) + for iR in range(num_images): + for iC in range(num_images): + cur_img_id = iR*num_images + iC + cur_img = np.reshape(vals[0,dim*cur_img_id+np.array(range(dim))], self.dimensions, order=self.order) + first_row = iR*self.dimensions[0] + last_row = (iR+1)*self.dimensions[0] + first_col = iC*self.dimensions[1] + last_col = (iC+1)*self.dimensions[1] + self.vals[first_row:last_row, first_col:last_col] = cur_img else: - self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F') + self.vals = np.reshape(vals[0,dim*self.select_image+np.array(range(dim))], self.dimensions, order=self.order) if self.transpose: self.vals = self.vals.T # if not self.scale: @@ -296,8 +315,8 @@ class image_show(matplotlib_show): self.vals = -self.vals # un-normalizing, for visualisation purposes: - if self.presetSTD >= 0: # The Mean is assumed to be in the range (0,255) - self.vals = self.vals*self.presetSTD + self.presetMean + if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255) + self.vals = self.vals*self.preset_std + self.preset_mean # Clipping the values: self.vals[self.vals < 0] = 0 self.vals[self.vals > 255] = 255 diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst index 4fd3528f..288ff631 100644 --- a/doc/GPy.examples.rst +++ b/doc/GPy.examples.rst @@ -20,6 +20,14 @@ GPy.examples.dimensionality_reduction module :undoc-members: :show-inheritance: +GPy.examples.laplace_approximations module +------------------------------------------ + +.. automodule:: GPy.examples.laplace_approximations + :members: + :undoc-members: + :show-inheritance: + GPy.examples.regression module ------------------------------ diff --git a/doc/GPy.kern.parts.rst b/doc/GPy.kern.parts.rst index ec0661b4..650fe5cb 100644 --- a/doc/GPy.kern.parts.rst +++ b/doc/GPy.kern.parts.rst @@ -28,6 +28,14 @@ GPy.kern.parts.Matern52 module :undoc-members: :show-inheritance: +GPy.kern.parts.ODE_1 module +--------------------------- + +.. automodule:: GPy.kern.parts.ODE_1 + :members: + :undoc-members: + :show-inheritance: + GPy.kern.parts.bias module -------------------------- @@ -44,6 +52,14 @@ GPy.kern.parts.coregionalize module :undoc-members: :show-inheritance: +GPy.kern.parts.eq_ode1 module +----------------------------- + +.. automodule:: GPy.kern.parts.eq_ode1 + :members: + :undoc-members: + :show-inheritance: + GPy.kern.parts.exponential module --------------------------------- diff --git a/doc/GPy.likelihoods.noise_models.rst b/doc/GPy.likelihoods.noise_models.rst index d1a4f451..6fec5aff 100644 --- a/doc/GPy.likelihoods.noise_models.rst +++ b/doc/GPy.likelihoods.noise_models.rst @@ -4,10 +4,10 @@ GPy.likelihoods.noise_models package Submodules ---------- -GPy.likelihoods.noise_models.binomial_noise module --------------------------------------------------- +GPy.likelihoods.noise_models.bernoulli_noise module +--------------------------------------------------- -.. automodule:: GPy.likelihoods.noise_models.binomial_noise +.. automodule:: GPy.likelihoods.noise_models.bernoulli_noise :members: :undoc-members: :show-inheritance: @@ -60,6 +60,14 @@ GPy.likelihoods.noise_models.poisson_noise module :undoc-members: :show-inheritance: +GPy.likelihoods.noise_models.student_t_noise module +--------------------------------------------------- + +.. automodule:: GPy.likelihoods.noise_models.student_t_noise + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst index c3da2650..34d98739 100644 --- a/doc/GPy.likelihoods.rst +++ b/doc/GPy.likelihoods.rst @@ -43,6 +43,14 @@ GPy.likelihoods.gaussian_mixed_noise module :undoc-members: :show-inheritance: +GPy.likelihoods.laplace module +------------------------------ + +.. automodule:: GPy.likelihoods.laplace + :members: + :undoc-members: + :show-inheritance: + GPy.likelihoods.likelihood module --------------------------------- diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst index bd5258b7..98b001c0 100644 --- a/doc/GPy.testing.rst +++ b/doc/GPy.testing.rst @@ -4,6 +4,14 @@ GPy.testing package Submodules ---------- +GPy.testing.bcgplvm_tests module +-------------------------------- + +.. automodule:: GPy.testing.bcgplvm_tests + :members: + :undoc-members: + :show-inheritance: + GPy.testing.bgplvm_tests module ------------------------------- @@ -28,6 +36,14 @@ GPy.testing.examples_tests module :undoc-members: :show-inheritance: +GPy.testing.gp_transformation_tests module +------------------------------------------ + +.. automodule:: GPy.testing.gp_transformation_tests + :members: + :undoc-members: + :show-inheritance: + GPy.testing.gplvm_tests module ------------------------------ @@ -44,6 +60,14 @@ GPy.testing.kernel_tests module :undoc-members: :show-inheritance: +GPy.testing.likelihoods_tests module +------------------------------------ + +.. automodule:: GPy.testing.likelihoods_tests + :members: + :undoc-members: + :show-inheritance: + GPy.testing.mapping_tests module -------------------------------- diff --git a/doc/GPy.util.rst b/doc/GPy.util.rst index c86280a7..f2aaed7f 100644 --- a/doc/GPy.util.rst +++ b/doc/GPy.util.rst @@ -27,6 +27,14 @@ GPy.util.classification module :undoc-members: :show-inheritance: +GPy.util.config module +---------------------- + +.. automodule:: GPy.util.config + :members: + :undoc-members: + :show-inheritance: + GPy.util.datasets module ------------------------ @@ -43,6 +51,14 @@ GPy.util.decorators module :undoc-members: :show-inheritance: +GPy.util.erfcx module +--------------------- + +.. automodule:: GPy.util.erfcx + :members: + :undoc-members: + :show-inheritance: + GPy.util.linalg module ---------------------- @@ -51,6 +67,14 @@ GPy.util.linalg module :undoc-members: :show-inheritance: +GPy.util.ln_diff_erfs module +---------------------------- + +.. automodule:: GPy.util.ln_diff_erfs + :members: + :undoc-members: + :show-inheritance: + GPy.util.misc module -------------------- @@ -75,6 +99,14 @@ GPy.util.multioutput module :undoc-members: :show-inheritance: +GPy.util.netpbmfile module +-------------------------- + +.. automodule:: GPy.util.netpbmfile + :members: + :undoc-members: + :show-inheritance: + GPy.util.plot module -------------------- @@ -99,6 +131,14 @@ GPy.util.squashers module :undoc-members: :show-inheritance: +GPy.util.symbolic module +------------------------ + +.. automodule:: GPy.util.symbolic + :members: + :undoc-members: + :show-inheritance: + GPy.util.univariate_Gaussian module -----------------------------------