diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index 7a87b98d..eac00fec 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -140,7 +140,6 @@ class FITC(SparseGP): dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) - dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) alpha = mdot(LBiLmipsi1,self.V_star) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 278ddc74..68d4376e 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -19,9 +19,6 @@ class GP(GPBase): :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True :rtype: model object - :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 - :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] - :type powerep: list .. Note:: Multiple independent outputs are allowed using columns of Y @@ -105,7 +102,12 @@ class GP(GPBase): Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ - return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + if not isinstance(self.likelihood,EP): + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + else: + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + return tmp def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): """ @@ -136,7 +138,7 @@ class GP(GPBase): :type Xnew: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -153,5 +155,71 @@ class GP(GPBase): # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) - return mean, var, _025pm, _975pm + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + For a specific output, predict the function at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + .. Note:: For multiple output models only + """ + assert hasattr(self,'multioutput') + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + return mean, var, _025pm, _975pm + + def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only + """ + assert hasattr(self,'multioutput') + + # creates an index column and appends it to _Xnew + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T + KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1) + mu = np.dot(KiKx.T, self.likelihood.Y) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - np.dot(KiKx.T, Kx) + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) + var = var[:, None] + if stop: + debug_this # @UndefinedVariable + return mu, var diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 6935fc6f..4513ddac 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -57,34 +57,30 @@ class GPBase(Model): self.X = state.pop() Model.setstate(self, state) - def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None): """ - Plot the GP's view of the world, where the data is normalized and the - likelihood is Gaussian. + Plot the GP's view of the world, where the data is normalized and the + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - Not implemented in higher dimensions - Plot the posterior of the GP. - - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted function - - In higher dimensions, we've no implemented this yet !TODO! - - Can plot only part of the data and part of the posterior functions - using which_data and which_functions - - :param samples: the number of a posteriori samples to plot - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param full_cov: - :type full_cov: bool - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle + :param samples: the number of a posteriori samples to plot + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :param which_data: which if the training data to plot (default all) + :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_parts: which of the kernel functions to plot (additively) + :type which_parts: 'all', or list of bools + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param full_cov: + :type full_cov: bool + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param output: which output to plot (for multiple output models only) + :type output: integer (first output is 0) """ if which_data == 'all': which_data = slice(None) @@ -93,7 +89,7 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1: + if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) if samples == 0: m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -111,7 +107,7 @@ class GPBase(Model): ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: + elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): resolution = resolution or 50 Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) m, v = self._raw_predict(Xnew, which_parts=which_parts) @@ -120,17 +116,51 @@ class GPBase(Model): ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) + + + elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): + output -= 1 + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs + Xu = self.X[self.X[:,-1]==output ,0:1] + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + + if samples == 0: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts) + gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) + ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) + else: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True) + Ysim = np.random.multivariate_normal(m.flatten(), v, samples) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) + for i in range(samples): + ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) + ax.set_xlim(xmin, xmax) + ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_ylim(ymin, ymax) + + if hasattr(self,'Z'): + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + + elif self.X.shape[1] == 3 and hasattr(self,'multioutput'): + raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" + output -= 1 + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs + else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): """ Plot the GP with noise where the likelihood is Gaussian. Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In two dimsensions, a contour-plot shows the mean predicted function - - In higher dimensions, we've no implemented this yet !TODO! + - Not implemented in higher dimensions Can plot only part of the data and part of the posterior functions using which_data and which_functions @@ -151,15 +181,13 @@ class GPBase(Model): :type fignum: figure number :param ax: axes to plot on. :type ax: axes handle + :type output: integer (first output is 0) :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill - :type fillcol: - :param levels: for 2D plotting, the number of contour levels to use - is ax is None, create a new figure - + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ # TODO include samples if which_data == 'all': @@ -169,41 +197,81 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - plotdims = self.input_dim - len(fixed_inputs) + if not hasattr(self,'multioutput'): - if plotdims == 1: + plotdims = self.input_dim - len(fixed_inputs) + if plotdims == 1: + resolution = resolution or 200 - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - fixed_dims = np.array([i for i,v in fixed_inputs]) - freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) + fixed_dims = np.array([i for i,v in fixed_inputs]) + freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) - Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits) - Xgrid = np.empty((Xnew.shape[0],self.input_dim)) - Xgrid[:,freedim] = Xnew - for i,v in fixed_inputs: - Xgrid[:,i] = v + Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits) + Xgrid = np.empty((Xnew.shape[0],self.input_dim)) + Xgrid[:,freedim] = Xnew + for i,v in fixed_inputs: + Xgrid[:,i] = v - m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts) - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) + m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) + ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME - resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) - x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - Yf = self.likelihood.data.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) + + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + elif self.X.shape[1] == 2: + resolution = resolution or 50 + Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + m = m.reshape(resolution, resolution).T + ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + Yf = self.likelihood.Y.flatten() + ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs + if self.X.shape[1] == 2: + resolution = resolution or 200 + Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest + Xu = self.X * self._Xscale + self._Xoffset + Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + elif self.X.shape[1] == 3: + raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs" + resolution = resolution or 50 + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/core/model.py b/GPy/core/model.py index 2965cf94..6832d4e0 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -458,7 +458,7 @@ class Model(Parameterized): numerical_gradient = (f1 - f2) / (2 * dx) global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) - return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance + return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) else: # check the gradient of each parameter individually, and do some pretty printing try: @@ -549,7 +549,7 @@ class Model(Parameterized): :type optimzer: string TODO: valid strings? """ - assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" + assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 last_ll = -np.inf diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index abcdf72c..7085c2fc 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -5,7 +5,7 @@ import numpy as np import pylab as pb from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from scipy import linalg -from ..likelihoods import Gaussian +from ..likelihoods import Gaussian, EP,EP_Mixed_Noise from gp_base import GPBase class SparseGP(GPBase): @@ -109,7 +109,6 @@ class SparseGP(GPBase): tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1) self._A = tdot(tmp) - # factor B self.B = np.eye(self.num_inducing) + self._A self.LB = jitchol(self.B) @@ -139,6 +138,7 @@ class SparseGP(GPBase): dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: + if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: @@ -160,9 +160,23 @@ class SparseGP(GPBase): # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" + + if self.has_uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + + else: + Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) + _Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0) + + self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2 + self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2 + self.partial_for_likelihood += 0.5*np.sum(_Bi_Lmi_psi1*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 #NOTE this term has numerical issues + self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2 + self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2 + else: - # likelihood is not heterscedatic + # likelihood is not heteroscedatic self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit) @@ -298,7 +312,7 @@ class SparseGP(GPBase): :type X_variance_new: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -322,18 +336,15 @@ class SparseGP(GPBase): return mean, var, _025pm, _975pm - def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): - + def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None): if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) if which_data is 'all': which_data = slice(None) - GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax) - - # add the inducing inputs and some errorbars - if self.X.shape[1] == 1: + GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output) + if self.X.shape[1] == 1 and not hasattr(self,'multioutput'): if self.has_uncertain_inputs: Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], @@ -342,6 +353,109 @@ class SparseGP(GPBase): Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) - elif self.X.shape[1] == 2: + elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'): Zu = self.Z * self._Xscale + self._Xoffset ax.plot(Zu[:, 0], Zu[:, 1], 'wo') + + elif self.X.shape[1] == 2 and hasattr(self,'multioutput'): + Xu = self.X[self.X[:,-1]==output,:] + if self.has_uncertain_inputs: + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + + Xu = self.X[self.X[:,-1]==output ,0:1] #?? + + ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], + xerr=2 * np.sqrt(self.X_variance[which_data, 0]), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + #ax.set_ylim(ax.get_ylim()[0],) + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + For a specific output, predict the function at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + .. Note:: For multiple output models only + """ + + assert hasattr(self,'multioutput') + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + return mean, var, _025pm, _975pm + + def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only + """ + Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! + symmetrify(Bi) + Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi) + + if self.Cpsi1V is None: + psi1V = np.dot(self.psi1.T,self.likelihood.V) + tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) + tmp, _ = dpotrs(self.LB, tmp, lower=1) + self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1) + + assert hasattr(self,'multioutput') + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + if X_variance_new is None: + Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts) + mu = np.dot(Kx.T, self.Cpsi1V) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) + else: + Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) + mu = np.dot(Kx, self.Cpsi1V) + if full_cov: + raise NotImplementedError, "TODO" + else: + Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new) + psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new) + var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) + + return mu, var[:, None] diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 994e3b48..005b131f 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -298,7 +298,7 @@ def bgplvm_simulation(optimize='scg', return m def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): - D1, D2, D3, N, num_inducing, Q = 30, 10, 15, 60, 3, 10 + D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] @@ -321,7 +321,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): if optimize: print "Optimizing Model:" - m.optimize(messages=1, max_iters=8e3, max_f_eval=8e3, gtol=.1) + m.optimize(messages=1, max_iters=8e3, gtol=.1) if plot: m.plot_X_1d("MRD Latent Space 1D") m.plot_scales("MRD Scales") diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index c7849a46..df7b92b8 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -22,8 +22,8 @@ def coregionalisation_toy2(max_iters=100): Y = np.vstack((Y1, Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2, 1) - k = k1**k2 + k2 = GPy.kern.coregionalise(2,1) + k = k1**k2 #k = k1.prod(k2,tensor=True) m = GPy.models.GPRegression(X, Y, kernel=k) m.constrain_fixed('.*rbf_var', 1.) # m.constrain_positive('.*kappa') @@ -46,32 +46,23 @@ def coregionalisation_toy(max_iters=100): """ X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 - index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) - X = np.hstack((np.vstack((X1, X2)), index)) + X = np.vstack((X1, X2)) Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y = np.vstack((Y1, Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #k1.prod(k2, tensor=True) - m = GPy.models.GPRegression(X, Y, kernel=k) + m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m.constrain_fixed('.*rbf_var', 1.) - # m.constrain_positive('kappa') m.optimize(max_iters=max_iters) - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') return m - def coregionalisation_sparse(max_iters=100): """ A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. @@ -86,31 +77,39 @@ def coregionalisation_sparse(max_iters=100): num_inducing = 40 Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None])) + Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001) - m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) + m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20) + #k2 = GPy.kern.coregionalise(2, 2) + #k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001) + #m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m.constrain_fixed('.*rbf_var', 1.) - m.constrain_fixed('iip') - m.constrain_bounded('noise_variance', 1e-3, 1e-1) + + #m.constrain_fixed('iip') + #m.constrain_bounded('noise_variance', 1e-3, 1e-1) # m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') m.optimize(max_iters=max_iters) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') # plotting: - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) - y = pb.ylim()[0] - pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2) - pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2) + #pb.figure() + #Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) + #Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) + #mean, var, low, up = m.predict(Xtest1) + #GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) + #mean, var, low, up = m.predict(Xtest2) + #GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) + #pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) + #pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + #y = pb.ylim()[0] + #pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2) + #pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2) return m def epomeo_gpx(max_iters=100): @@ -401,8 +400,6 @@ def silhouette(max_iters=100): print(m) return m - - def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index 0ef487af..589ec4c7 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -130,7 +130,7 @@ class opt_lbfgsb(Optimizer): opt_dict['pgtol'] = self.gtol opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, - maxfun=self.max_f_eval, **opt_dict) + maxfun=self.max_iters, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index d788b4f5..943965ed 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -73,9 +73,9 @@ def gibbs(input_dim,variance=1., mapping=None): Gibbs and MacKay non-stationary covariance function. .. math:: - + r = sqrt((x_i - x_j)'*(x_i - x_j)) - + k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x'))) Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')} @@ -89,7 +89,7 @@ def gibbs(input_dim,variance=1., mapping=None): The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. :param input_dim: the number of input dimensions - :type input_dim: int + :type input_dim: int :param variance: the variance :math:`\sigma^2` :type variance: float :param mapping: the mapping that gives the lengthscale across the input space. @@ -346,29 +346,30 @@ def symmetric(k): k_.parts = [symmetric.Symmetric(p) for p in k.parts] return k_ -def coregionalise(output_dim, rank=1, W=None, kappa=None): +def coregionalise(num_outputs,W_columns=1, W=None, kappa=None): """ - Coregionalisation kernel. - - Used for computing covariance functions of the form - .. math:: - k_2(x, y)=\mathbf{B} k(x, y) - where + Coregionlization matrix B, of the form: .. math:: \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} - :param output_dim: the number of output dimensions - :type output_dim: int - :param rank: the rank of the coregionalisation matrix. - :type rank: int - :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. - :type W: ndarray - :param kappa: a diagonal term which allows the outputs to behave independently. + An intrinsic/linear coregionalization kernel of the form + .. math:: + k_2(x, y)=\mathbf{B} k(x, y) + + it is obtainded as the tensor product between a kernel k(x,y) and B. + + :param num_outputs: the number of outputs to corregionalise + :type num_outputs: int + :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None) + :type W_colunns: int + :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B + :type W: numpy array of dimensionality (num_outpus, W_columns) + :param kappa: a vector which allows the outputs to behave independently + :type kappa: numpy array of dimensionality (num_outputs,) :rtype: kernel object - .. Note: see coregionalisation examples in GPy.examples.regression for some usage. """ - p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa) + p = parts.coregionalise.Coregionalise(num_outputs,W_columns,W,kappa) return kern(1,[p]) @@ -427,3 +428,31 @@ def hierarchical(k): # assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" _parts = [parts.hierarchical.Hierarchical(k.parts)] return kern(k.input_dim+len(k.parts),_parts) + +def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None): + """ + Builds a kernel of a linear coregionalization model + + :input_dim: Input dimensionality + :num_outputs: Number of outputs + :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix + :type kernel_list: list of GPy kernels + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' + :type W_columns: integer + + ..Note the kernels dimensionality is overwritten to fit input_dim + """ + + for k in kernel_list: + if k.input_dim <> input_dim: + k.input_dim = input_dim + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + k_coreg = coregionalise(num_outputs,W_columns,W,kappa) + kernel = kernel_list[0]**k_coreg.copy() + + for k in kernel_list[1:]: + k_coreg = coregionalise(num_outputs,W_columns,W,kappa) + kernel += k**k_coreg.copy() + + return kernel diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 9143854e..e7b3e30a 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -421,19 +421,19 @@ class kern(Parameterized): # TODO: input_slices needed crossterms = 0 - for p1, p2 in itertools.combinations(self.parts, 2): + for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2): + if i_s1 == i_s2: + # TODO psi1 this must be faster/better/precached/more nice + tmp1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) + tmp2 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + + prod = np.multiply(tmp1, tmp2) + crossterms += prod[:, :, None] + prod[:, None, :] - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z, mu, S, tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target + # target += crossterms + return target + crossterms def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): """Gradient of the psi2 statistics with respect to the parameters.""" diff --git a/GPy/kern/parts/coregionalise.py b/GPy/kern/parts/coregionalise.py index 94179088..66e14052 100644 --- a/GPy/kern/parts/coregionalise.py +++ b/GPy/kern/parts/coregionalise.py @@ -9,42 +9,45 @@ from scipy import weave class Coregionalise(Kernpart): """ - Coregionalisation kernel. + Kernel for intrinsic/linear coregionalization models - Used for computing covariance functions of the form + This kernel has the form .. math:: - k_2(x, y)=B k(x, y) - where - .. math:: - B = WW^\top + diag(kappa) + \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} - :param output_dim: the number of output dimensions - :type output_dim: int - :param rank: the rank of the coregionalisation matrix. - :type rank: int - :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. - :type W: ndarray - :param kappa: a diagonal term which allows the outputs to behave independently. - :rtype: kernel object + An intrinsic/linear coregionalization kernel of the form + .. math:: + k_2(x, y)=\mathbf{B} k(x, y) + + it is obtainded as the tensor product between a kernel k(x,y) and B. + + :param num_outputs: number of outputs to coregionalize + :type num_outputs: int + :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None) + :type W_colunns: int + :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B + :type W: numpy array of dimensionality (num_outpus, W_columns) + :param kappa: a vector which allows the outputs to behave independently + :type kappa: numpy array of dimensionality (num_outputs,) .. Note: see coregionalisation examples in GPy.examples.regression for some usage. """ - def __init__(self,output_dim,rank=1, W=None, kappa=None): + def __init__(self,num_outputs,W_columns=1, W=None, kappa=None): self.input_dim = 1 self.name = 'coregion' - self.output_dim = output_dim - self.rank = rank + self.num_outputs = num_outputs + self.W_columns = W_columns if W is None: - self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) + self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns) else: - assert W.shape==(self.output_dim,self.rank) + assert W.shape==(self.num_outputs,self.W_columns) self.W = W if kappa is None: - kappa = 0.5*np.ones(self.output_dim) + kappa = 0.5*np.ones(self.num_outputs) else: - assert kappa.shape==(self.output_dim,) + assert kappa.shape==(self.num_outputs,) self.kappa = kappa - self.num_params = self.output_dim*(self.rank + 1) + self.num_params = self.num_outputs*(self.W_columns + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): @@ -52,12 +55,12 @@ class Coregionalise(Kernpart): def _set_params(self,x): assert x.size == self.num_params - self.kappa = x[-self.output_dim:] - self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank) + self.kappa = x[-self.num_outputs:] + self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) def _get_param_names(self): - return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)] + return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)] def K(self,index,index2,target): index = np.asarray(index,dtype=np.int) @@ -75,26 +78,26 @@ class Coregionalise(Kernpart): if index2 is None: code=""" for(int i=0;i' + k2.name + self.name = '['+k1.name + '(x)' + k2.name +']' self.k1 = k1 self.k2 = k2 if tensor: diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 1b65133f..855e2b71 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -242,13 +242,14 @@ class RBF(Kernpart): def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 - if not fast_array_equal(Z, self._Z): + Z_changed = not fast_array_equal(Z, self._Z) + if Z_changed: # Z has changed, compute Z specific stuff self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q - if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): + if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): # something's changed. recompute EVERYTHING # psi1 diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 99e88b6d..0cb62eb0 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,4 +1,7 @@ from ep import EP +from ep_mixed_noise import EP_Mixed_Noise from gaussian import Gaussian +from gaussian_mixed_noise import Gaussian_Mixed_Noise +from noise_model_constructors import * # TODO: from Laplace import Laplace -import likelihood_functions as functions + diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py index 94f760e9..c9f23839 100644 --- a/GPy/likelihoods/ep.py +++ b/GPy/likelihoods/ep.py @@ -4,23 +4,23 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from likelihood import likelihood class EP(likelihood): - def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]): + def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]): """ Expectation Propagation Arguments --------- epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - LikelihoodFunction : a likelihood function (see likelihood_functions.py) + noise_model : a likelihood function (see likelihood_functions.py) """ - self.LikelihoodFunction = LikelihoodFunction + self.noise_model = noise_model self.epsilon = epsilon self.eta, self.delta = power_ep self.data = data self.N, self.output_dim = self.data.shape self.is_heteroscedastic = True self.Nparams = 0 - self._transf_data = self.LikelihoodFunction._preprocess_values(data) + self._transf_data = self.noise_model._preprocess_values(data) #Initial values - Likelihood approximation parameters: #p(y|f) = t(f|tau_tilde,v_tilde) @@ -52,16 +52,23 @@ class EP(likelihood): def predictive_values(self,mu,var,full_cov): if full_cov: raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.LikelihoodFunction.predictive_values(mu,var) + return self.noise_model.predictive_values(mu,var) def _get_params(self): - return np.zeros(0) + #return np.zeros(0) + return self.noise_model._get_params() + def _get_param_names(self): - return [] + #return [] + return self.noise_model._get_param_names() + def _set_params(self,p): - pass # TODO: the EP likelihood might want to take some parameters... + #pass # TODO: the EP likelihood might want to take some parameters... + self.noise_model._set_params(p) + def _gradients(self,partial): - return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... + #return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... + return self.noise_model._gradients(partial) def _compute_GP_variables(self): #Variables to be called from GP @@ -116,7 +123,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) @@ -206,7 +213,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) @@ -301,7 +308,7 @@ class EP(likelihood): self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) #Site parameters update Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) diff --git a/GPy/likelihoods/ep_mixed_noise.py b/GPy/likelihoods/ep_mixed_noise.py new file mode 100644 index 00000000..ffc8cb51 --- /dev/null +++ b/GPy/likelihoods/ep_mixed_noise.py @@ -0,0 +1,385 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs +from likelihood import likelihood + +class EP_Mixed_Noise(likelihood): + def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]): + """ + Expectation Propagation + + Arguments + --------- + :param data_list: list of outputs + :param noise_model_list: a list of noise models + :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations + :type epsilon: float + :param power_ep: list of power ep parameters + """ + assert len(data_list) == len(noise_model_list) + self.noise_model_list = noise_model_list + n_list = [data.size for data in data_list] + self.n_models = len(data_list) + self.n_params = [noise_model._get_params().size for noise_model in noise_model_list] + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)]) + self.epsilon = epsilon + self.eta, self.delta = power_ep + self.data = np.vstack(data_list) + self.N, self.output_dim = self.data.shape + self.is_heteroscedastic = True + self.Nparams = 0#FIXME + self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)]) + #TODO non-gaussian index + + #Initial values - Likelihood approximation parameters: + #p(y|f) = t(f|tau_tilde,v_tilde) + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + + #initial values for the GP variables + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def restart(self): + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N)[:,None] + self.Z = 0 + self.YYT = None + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = 0. + + def predictive_values(self,mu,var,full_cov,noise_model): + """ + Predicts the output given the GP + + :param mu: GP's mean + :param var: GP's variance + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: False|True + :param noise_model: noise model to use + :type noise_model: integer + """ + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + #_mu = [] + #_var = [] + #_q1 = [] + #_q2 = [] + #for m,v,o in zip(mu,var,output.flatten()): + # a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v) + # _mu.append(a) + # _var.append(b) + # _q1.append(c) + # _q2.append(d) + #return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2) + return self.noise_model_list[noise_model].predictive_values(mu,var) + + def _get_params(self): + return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) + + def _get_param_names(self): + names = [] + for noise_model in self.noise_model_list: + names += noise_model._get_param_names() + return names + + def _set_params(self,p): + cs_params = np.cumsum([0]+self.n_params) + for i in range(len(self.n_params)): + self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) + + def _gradients(self,partial): + #NOTE this is not tested + return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list]) + + def _compute_GP_variables(self): + #Variables to be called from GP + mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model + sigma_sum = 1./self.tau_ + 1./self.tau_tilde + mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 + self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep + + self.Y = mu_tilde[:,None] + self.YYT = np.dot(self.Y,self.Y.T) + self.covariance_matrix = np.diag(1./self.tau_tilde) + self.precision = self.tau_tilde[:,None] + self.V = self.precision * self.Y + self.VVT_factor = self.V + self.trYYT = np.trace(self.YYT) + + def fit_full(self,K): + """ + The expectation-propagation algorithm. + For nomenclature see Rasmussen & Williams 2006. + """ + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) + mu = np.zeros(self.N) + Sigma = K.copy() + + """ + Initial values - Cavity distribution parameters: + q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = self.epsilon + 1. + epsilon_np2 = self.epsilon + 1. + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) + mu = np.dot(Sigma,self.v_tilde) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K + B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + L = jitchol(B) + V,info = dtrtrs(L,Sroot_tilde_K,lower=1) + Sigma = K - np.dot(V.T,V) + mu = np.dot(Sigma,self.v_tilde) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() + + def fit_DTC(self, Kmm, Kmn): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see ... 2013. + """ + num_inducing = Kmm.shape[0] + + #TODO: this doesn't work with uncertain inputs! + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = Qnn = Knm*Kmmi*Kmn + """ + KmnKnm = np.dot(Kmn,Kmn.T) + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + KmmiKmn = np.dot(Kmmi,Kmn) + Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + LLT0 = Kmm.copy() + + #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) + #KmnKnm = np.dot(Kmn, Kmn.T) + #KmmiKmn = np.dot(Kmmi,Kmn) + #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + #LLT0 = Kmm.copy() + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + mu = np.zeros(self.N) + LLT = Kmm.copy() + Sigma_diag = Qnn_diag.copy() + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + np1 = [self.tau_tilde.copy()] + np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau + L = jitchol(LLT) + #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) + V,info = dtrtrs(L,Kmn,lower=1) + Sigma_diag = np.sum(V*V,-2) + si = np.sum(V.T*V[:,i],-1) + mu += (Delta_v-Delta_tau*mu[i])*si + self.iterations += 1 + #Sigma recomputation with Cholesky decompositon + LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) + L = jitchol(LLT) + V,info = dtrtrs(L,Kmn,lower=1) + V2,info = dtrtrs(L.T,V,lower=0) + Sigma_diag = np.sum(V*V,-2) + Knmv_tilde = np.dot(Kmn,self.v_tilde) + mu = np.dot(V2.T,Knmv_tilde) + epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N + np1.append(self.tau_tilde.copy()) + np2.append(self.v_tilde.copy()) + + self._compute_GP_variables() + + def fit_FITC(self, Kmm, Kmn, Knn_diag): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see Naish-Guzman and Holden, 2008. + """ + num_inducing = Kmm.shape[0] + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn + """ + Lm = jitchol(Kmm) + Lmi = chol_inv(Lm) + Kmmi = np.dot(Lmi.T,Lmi) + P0 = Kmn.T + KmnKnm = np.dot(P0.T, P0) + KmmiKmn = np.dot(Kmmi,P0.T) + Qnn_diag = np.sum(P0.T*KmmiKmn,-2) + Diag0 = Knn_diag - Qnn_diag + R0 = jitchol(Kmmi).T + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*Gamma + """ + self.w = np.zeros(self.N) + self.Gamma = np.zeros(num_inducing) + mu = np.zeros(self.N) + P = P0.copy() + R = R0.copy() + Diag = Diag0.copy() + Sigma_diag = Knn_diag + RPT0 = np.dot(R0,P0.T) + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] += Delta_tau + self.v_tilde[i] += Delta_v + #Posterior distribution parameters update + dtd1 = Delta_tau*Diag[i] + 1. + dii = Diag[i] + Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 + pi_ = P[i,:].reshape(1,num_inducing) + P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ + Rp_i = np.dot(R,pi_.T) + RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) + R = jitchol(RTR).T + self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 + self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + mu = self.w + np.dot(P,self.Gamma) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) + Diag = Diag0 * Iplus_Dprod_i + P = Iplus_Dprod_i[:,None] * P0 + safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) + L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) + R,info = dtrtrs(L,R0,lower=1) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + self.w = Diag * self.v_tilde + self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) + mu = self.w + np.dot(P,self.Gamma) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 7665e608..cdf54dd6 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -7,9 +7,9 @@ class Gaussian(likelihood): """ Likelihood class for doing Expectation propagation - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used - :param variance : + :param data: observed output + :type data: Nx1 numpy.darray + :param variance: noise parameter :param normalize: whether to normalize the data before computing (predictions will be in original scales) :type normalize: False|True """ diff --git a/GPy/likelihoods/gaussian_mixed_noise.py b/GPy/likelihoods/gaussian_mixed_noise.py new file mode 100644 index 00000000..e7df2584 --- /dev/null +++ b/GPy/likelihoods/gaussian_mixed_noise.py @@ -0,0 +1,106 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs +from likelihood import likelihood +from . import Gaussian + + +class Gaussian_Mixed_Noise(likelihood): + """ + Gaussian Likelihood for multiple outputs + + This is a wrapper around likelihood.Gaussian class + + :param data_list: data observations + :type data_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param noise_params: noise parameters of each output + :type noise_params: list of floats, one per output + :param normalize: whether to normalize the data before computing (predictions will be in original scales) + :type normalize: False|True + """ + def __init__(self, data_list, noise_params=None, normalize=True): + self.Nparams = len(data_list) + self.n_list = [data.size for data in data_list] + self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)]) + + if noise_params is None: + noise_params = [1.] * self.Nparams + else: + assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.' + + self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)] + self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list] + self.data = np.vstack(data_list) + self.N, self.output_dim = self.data.shape + self._offset = np.zeros((1, self.output_dim)) + self._scale = np.ones((1, self.output_dim)) + + self.is_heteroscedastic = True + self.Z = 0. # a correction factor which accounts for the approximation made + + self.set_data(data_list) + self._set_params(np.asarray(noise_params)) + + def set_data(self, data_list): + self.data = np.vstack(data_list) + self.N, D = self.data.shape + assert D == self.output_dim + self.Y = (self.data - self._offset) / self._scale + if D > self.N: + raise NotImplementedError + #self.YYT = np.dot(self.Y, self.Y.T) + #self.trYYT = np.trace(self.YYT) + #self.YYT_factor = jitchol(self.YYT) + else: + self.YYT = None + self.trYYT = np.sum(np.square(self.Y)) + self.YYT_factor = self.Y + + def predictive_values(self,mu,var,full_cov,noise_model): + """ + Predicts the output given the GP + + :param mu: GP's mean + :param var: GP's variance + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: False|True + :param noise_model: noise model to use + :type noise_model: integer + """ + if full_cov: + raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" + return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov) + + def _get_params(self): + return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list]) + + def _get_param_names(self): + if len(self.noise_model_list) == 1: + names = self.noise_model_list[0]._get_param_names() + else: + names = [] + for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))): + names.append(''.join(noise_model._get_param_names() + ['_%s' %i])) + return names + + def _set_params(self,p): + cs_params = np.cumsum([0]+self.n_params) + + for i in range(len(self.n_params)): + self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]]) + self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None] + + self.V = self.precision * self.Y + self.VVT_factor = self.precision * self.YYT_factor + self.covariance_matrix = np.eye(self.N) * 1./self.precision + + def _gradients(self,partial): + gradients = [] + aux = np.cumsum([0]+self.n_list) + for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list): + gradients += [noise_model._gradients(partial[ai:af])] + return np.hstack(gradients) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py deleted file mode 100644 index 7b9b8982..00000000 --- a/GPy/likelihoods/likelihood_functions.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf -import link_functions - -class LikelihoodFunction(object): - """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the LikelihoodFunction used - """ - def __init__(self,link): - if link == self._analytical: - self.moments_match = self._moments_match_analytical - else: - assert isinstance(link,link_functions.LinkFunction) - self.link = link - self.moments_match = self._moments_match_numerical - - def _preprocess_values(self,Y): - return Y - - def _product(self,gp,obs,mu,sigma): - return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs) - - def _nlog_product(self,gp,obs,mu,sigma): - return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs)) - - def _locate(self,obs,mu,sigma): - """ - Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration - """ - golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit - golden_B = np.array([np.log(obs),mu]).max() #Upper limit - return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product - - def _moments_match_numerical(self,obs,tau,v): - """ - Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input. - """ - mu = v/tau - sigma = np.sqrt(1./tau) - opt = self._locate(obs,mu,sigma) - width = 3./np.log(max(obs,2)) - A = opt - width #Grid's lower limit - B = opt + width #Grid's Upper limit - K = 10*int(np.log(max(obs,150))) #Number of points in the grid - h = (B-A)/K # length of the intervals - grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) - x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier - _aux1 = self._product(A,obs,mu,sigma) - _aux2 = self._product(B,obs,mu,sigma) - _aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma) - _aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma) - zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged - first = zeroth*x - second = first*x - Z_hat = sum(zeroth)*h/3 # Zero-th moment - mu_hat = sum(first)*h/(3*Z_hat) # First moment - m2 = sum(second)*h/(3*Z_hat) # Second moment - sigma2_hat = m2 - mu_hat**2 # Second central moment - return float(Z_hat), float(mu_hat), float(sigma2_hat) - -class Binomial(LikelihoodFunction): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def __init__(self,link=None): - self._analytical = link_functions.Probit - if not link: - link = self._analytical - super(Binomial, self).__init__(link) - - def _distribution(self,gp,obs): - pass - - def _log_distribution(self,gp,obs): - pass - - def _preprocess_values(self,Y): - """ - Check if the values of the observations correspond to the values - assumed by the likelihood function. - - ..Note:: Binary classification algorithm works better with classes {-1,1} - """ - Y_prep = Y.copy() - Y1 = Y[Y.flatten()==1].size - Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' - Y_prep[Y.flatten() == 0] = -1 - return Y_prep - - def _moments_match_analytical(self,data_i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = std_norm_cdf(z) - phi = std_norm_pdf(z) - mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - return Z_hat, mu_hat, sigma2_hat - - def predictive_values(self,mu,var): - """ - Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction - :param mu: mean of the latent variable - :param var: variance of the latent variable - """ - mu = mu.flatten() - var = var.flatten() - mean = stats.norm.cdf(mu/np.sqrt(1+var)) - norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] - norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] - p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) - p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) - return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var - -class Poisson(LikelihoodFunction): - """ - Poisson likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def __init__(self,link=None): - self._analytical = None - if not link: - link = link_functions.Log() - super(Poisson, self).__init__(link) - - def _distribution(self,gp,obs): - return stats.poisson.pmf(obs,self.link.inv_transf(gp)) - - def _log_distribution(self,gp,obs): - return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp) - - def predictive_values(self,mu,var): - """ - Compute mean, and conficence interval (percentiles 5 and 95) of the prediction - """ - mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location) - tmp = stats.poisson.ppf(np.array([.025,.975]),mean) - p_025 = tmp[:,0] - p_975 = tmp[:,1] - return mean,np.nan*mean,p_025,p_975 # better variance here TODO diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py deleted file mode 100644 index 3b9a55b2..00000000 --- a/GPy/likelihoods/link_functions.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2012, 2013 Ricardo Andrade -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -import scipy as sp -import pylab as pb -from ..util.plot import gpplot -from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf - -class LinkFunction(object): - """ - Link function class for doing non-Gaussian likelihoods approximation - - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used - """ - def __init__(self): - pass - -class Probit(LinkFunction): - """ - Probit link function: Squashes a likelihood between 0 and 1 - """ - def transf(self,mu): - pass - - def inv_transf(self,f): - pass - - def log_inv_transf(self,f): - pass diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py new file mode 100644 index 00000000..158f1674 --- /dev/null +++ b/GPy/likelihoods/noise_model_constructors.py @@ -0,0 +1,88 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import noise_models + +def binomial(gp_link=None): + """ + Construct a binomial likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Probit() + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + + if isinstance(gp_link,noise_models.gp_transformations.Probit): + analytical_mean = True + analytical_variance = False + + elif isinstance(gp_link,noise_models.gp_transformations.Step): + analytical_mean = True + analytical_variance = True + + else: + analytical_mean = False + analytical_variance = False + + return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) + +def exponential(gp_link=None): + """ + Construct a binomial likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + + analytical_mean = False + analytical_variance = False + return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance) + +def gaussian(gp_link=None,variance=1.): + """ + Construct a gaussian likelihood + + :param gp_link: a GPy gp_link function + :param variance: scalar + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Identity() + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + + analytical_mean = False + analytical_variance = False + return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance) + +def poisson(gp_link=None): + """ + Construct a Poisson likelihood + + :param gp_link: a GPy gp_link function + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Log_ex_1() + #else: + # assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.' + analytical_mean = False + analytical_variance = False + return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance) + +def gamma(gp_link=None,beta=1.): + """ + Construct a Gamma likelihood + + :param gp_link: a GPy gp_link function + :param beta: scalar + """ + if gp_link is None: + gp_link = noise_models.gp_transformations.Log_ex_1() + analytical_mean = False + analytical_variance = False + return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta) + + diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py new file mode 100644 index 00000000..b47702a7 --- /dev/null +++ b/GPy/likelihoods/noise_models/__init__.py @@ -0,0 +1,7 @@ +import noise_distributions +import binomial_noise +import exponential_noise +import gaussian_noise +import gamma_noise +import poisson_noise +import gp_transformations diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/binomial_noise.py new file mode 100644 index 00000000..9ceacf79 --- /dev/null +++ b/GPy/likelihoods/noise_models/binomial_noise.py @@ -0,0 +1,118 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Binomial(NoiseDistribution): + """ + Probit likelihood + Y is expected to take values in {-1,1} + ----- + $$ + L(x) = \\Phi (Y_i*f_i) + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + + ..Note:: Binary classification algorithm works better with classes {-1,1} + """ + Y_prep = Y.copy() + Y1 = Y[Y.flatten()==1].size + Y2 = Y[Y.flatten()==0].size + assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' + Y_prep[Y.flatten() == 0] = -1 + return Y_prep + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + if isinstance(self.gp_link,gp_transformations.Probit): + z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = std_norm_cdf(z) + phi = std_norm_pdf(z) + mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + + elif isinstance(self.gp_link,gp_transformations.Step): + Z_hat = None + mu_hat = None + sigma2_hat = None + + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) + + def _mass(self,gp,obs): + #NOTE obs must be in {0,1} + p = self.gp_link.transf(gp) + return p**obs * (1.-p)**(1.-obs) + + def _nlog_mass(self,gp,obs): + p = self.gp_link.transf(gp) + return obs*np.log(p) + (1.-obs)*np.log(1-p) + + def _dnlog_mass_dgp(self,gp,obs): + p = self.gp_link.transf(gp) + dp = self.gp_link.dtransf_df(gp) + return obs/p * dp - (1.-obs)/(1.-p) * dp + + def _d2nlog_mass_dgp2(self,gp,obs): + p = self.gp_link.transf(gp) + return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + p = self.gp_link.transf(gp) + return p*(1.-p) + + def _dvariance_dgp(self,gp): + return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp)) + + def _d2variance_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 + + """ + def predictive_values(self,mu,var): #TODO remove + mu = mu.flatten() + var = var.flatten() + #mean = stats.norm.cdf(mu/np.sqrt(1+var)) + mean = self._predictive_mean_analytical(mu,np.sqrt(var)) + norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)] + norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)] + #p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var)) + #p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) + p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var)) + p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var)) + return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var + """ diff --git a/GPy/likelihoods/noise_models/exponential_noise.py b/GPy/likelihoods/noise_models/exponential_noise.py new file mode 100644 index 00000000..e72b8c22 --- /dev/null +++ b/GPy/likelihoods/noise_models/exponential_noise.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Exponential(NoiseDistribution): + """ + Gamma likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp)) + + def _dnlog_mass_dgp(self,gp,obs): + return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp) + + def _d2nlog_mass_dgp2(self,gp,obs): + fgp = self.gp_link.transf(gp) + return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)**2 + + def _dvariance_dgp(self,gp): + return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp) + + def _d2variance_dgp2(self,gp): + return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp)) diff --git a/GPy/likelihoods/noise_models/gamma_noise.py b/GPy/likelihoods/noise_models/gamma_noise.py new file mode 100644 index 00000000..6bf0dd7b --- /dev/null +++ b/GPy/likelihoods/noise_models/gamma_noise.py @@ -0,0 +1,71 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Gamma(NoiseDistribution): + """ + Gamma likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.): + self.beta = beta + super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): + return Y + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + #return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance) + alpha = self.gp_link.transf(gp)*self.beta + return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + alpha = self.gp_link.transf(gp)*self.beta + return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) + + def _dnlog_mass_dgp(self,gp,obs): + return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta + + def _d2nlog_mass_dgp2(self,gp,obs): + return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp)/self.beta + + def _dvariance_dgp(self,gp): + return self.gp_link.dtransf_df(gp)/self.beta + + def _d2variance_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp)/self.beta diff --git a/GPy/likelihoods/noise_models/gaussian_noise.py b/GPy/likelihoods/noise_models/gaussian_noise.py new file mode 100644 index 00000000..398ed32a --- /dev/null +++ b/GPy/likelihoods/noise_models/gaussian_noise.py @@ -0,0 +1,98 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Gaussian(NoiseDistribution): + """ + Gaussian likelihood + + :param mean: mean value of the Gaussian distribution + :param variance: mean value of the Gaussian distribution + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.): + self.variance = variance + super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _get_params(self): + return np.array([self.variance]) + + def _get_param_names(self): + return ['noise_model_variance'] + + def _set_params(self,p): + self.variance = p + + def _gradients(self,partial): + return np.zeros(1) + #return np.sum(partial) + + def _preprocess_values(self,Y): + """ + Check if the values of the observations correspond to the values + assumed by the likelihood function. + """ + return Y + + def _moments_match_analytical(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + sigma2_hat = 1./(1./self.variance + tau_i) + mu_hat = sigma2_hat*(data_i/self.variance + v_i) + sum_var = self.variance + 1./tau_i + Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var) + return Z_hat, mu_hat, sigma2_hat + + def _predictive_mean_analytical(self,mu,sigma): + new_sigma2 = self.predictive_variance(mu,sigma) + return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) + + def _predictive_variance_analytical(self,mu,sigma,*args): #TODO *args? + return 1./(1./self.variance + 1./sigma**2) + + def _mass(self,gp,obs): + #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) + return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) #FIXME + + def _nlog_mass(self,gp,obs): + return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance)) + + def _dnlog_mass_dgp(self,gp,obs): + return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp) + + def _d2nlog_mass_dgp2(self,gp,obs): + return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.variance + + def _dvariance_dgp(self,gp): + return 0 + + def _d2variance_dgp2(self,gp): + return 0 diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py new file mode 100644 index 00000000..5f337aa4 --- /dev/null +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -0,0 +1,125 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +import scipy as sp +import pylab as pb +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf + +class GPTransformation(object): + """ + Link function class for doing non-Gaussian likelihoods approximation + + :param Y: observed output (Nx1 numpy.darray) + ..Note:: Y values allowed depend on the likelihood_function used + """ + def __init__(self): + pass + +class Identity(GPTransformation): + """ + $$ + g(f) = f + $$ + """ + #def transf(self,mu): + # return mu + + def transf(self,f): + return f + + def dtransf_df(self,f): + return 1. + + def d2transf_df2(self,f): + return 0 + + +class Probit(GPTransformation): + """ + $$ + g(f) = \\Phi^{-1} (mu) + $$ + """ + #def transf(self,mu): + # return inv_std_norm_cdf(mu) + + def transf(self,f): + return std_norm_cdf(f) + + def dtransf_df(self,f): + return std_norm_pdf(f) + + def d2transf_df2(self,f): + return -f * std_norm_pdf(f) + +class Log(GPTransformation): + """ + $$ + g(f) = \log(\mu) + $$ + """ + #def transf(self,mu): + # return np.log(mu) + + def transf(self,f): + return np.exp(f) + + def dtransf_df(self,f): + return np.exp(f) + + def d2transf_df2(self,f): + return np.exp(f) + +class Log_ex_1(GPTransformation): + """ + $$ + g(f) = \log(\exp(\mu) - 1) + $$ + """ + #def transf(self,mu): + # """ + # function: output space -> latent space + # """ + # return np.log(np.exp(mu) - 1) + + def transf(self,f): + """ + function: latent space -> output space + """ + return np.log(1.+np.exp(f)) + + def dtransf_df(self,f): + return np.exp(f)/(1.+np.exp(f)) + + def d2transf_df2(self,f): + aux = np.exp(f)/(1.+np.exp(f)) + return aux*(1.-aux) + +class Reciprocal(GPTransformation): + def transf(sefl,f): + return 1./f + + def dtransf_df(self,f): + return -1./f**2 + + def d2transf_df2(self,f): + return 2./f**3 + +class Step(GPTransformation): + """ + $$ + g(f) = I_{x \in A} + $$ + """ + def transf(self,f): + #transformation goes here + return np.where(f>0, 1, -1) + + def dtransf_df(self,f): + pass + + def d2transf_df2(self,f): + pass diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py new file mode 100644 index 00000000..9d4eedfb --- /dev/null +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -0,0 +1,384 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +import pylab as pb +from GPy.util.plot import gpplot +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations + + +class NoiseDistribution(object): + """ + Likelihood class for doing Expectation propagation + + :param Y: observed output (Nx1 numpy.darray) + ..Note:: Y values allowed depend on the LikelihoodFunction used + """ + def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): + #assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME + self.gp_link = gp_link + self.analytical_mean = analytical_mean + self.analytical_variance = analytical_variance + if self.analytical_mean: + self.moments_match = self._moments_match_analytical + self.predictive_mean = self._predictive_mean_analytical + else: + self.moments_match = self._moments_match_numerical + self.predictive_mean = self._predictive_mean_numerical + if self.analytical_variance: + self.predictive_variance = self._predictive_variance_analytical + else: + self.predictive_variance = self._predictive_variance_numerical + + def _get_params(self): + return np.zeros(0) + + def _get_param_names(self): + return [] + + def _set_params(self,p): + pass + + def _gradients(self,partial): + return np.zeros(0) + + def _preprocess_values(self,Y): + """ + In case it is needed, this function assess the output values or makes any pertinent transformation on them. + + :param Y: observed output (Nx1 numpy.darray) + """ + return Y + + def _product(self,gp,obs,mu,sigma): + """ + Product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) + + def _nlog_product_scaled(self,gp,obs,mu,sigma): + """ + Negative log-product between the cavity distribution and a likelihood factor. + ..Note:: The constant term in the Gaussian distribution is ignored. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) + + def _dnlog_product_dgp(self,gp,obs,mu,sigma): + """ + Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) + + def _d2nlog_product_dgp2(self,gp,obs,mu,sigma): + """ + Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor. + + :param gp: latent variable + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) + + def _product_mode(self,obs,mu,sigma): + """ + Newton's CG method to find the mode in _product (cavity x likelihood factor). + + :param obs: observed output + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma)) + + def _moments_match_analytical(self,obs,tau,v): + """ + If available, this function computes the moments analytically. + """ + pass + + def _moments_match_numerical(self,obs,tau,v): + """ + Lapace approximation to calculate the moments. + + :param obs: observed output + :param tau: cavity distribution 1st natural parameter (precision) + :param v: cavity distribution 2nd natural paramenter (mu*precision) + """ + mu = v/tau + mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) + sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs)) + Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat) + return Z_hat,mu_hat,sigma2_hat + + def _nlog_conditional_mean_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) + + def _dnlog_conditional_mean_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_conditional_mean_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 + + def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E(V(Y_star|f_star)) + """ + return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) + + def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) + + def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 + + def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma): + """ + Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + + ..Note:: This function helps computing E( E(Y_star|f_star)**2 ) + """ + return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) + + def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma): + """ + Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) + + def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma): + """ + Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v. + + :param gp: latent variable + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) + + def _predictive_mean_analytical(self,mu,sigma): + """ + If available, this function computes the predictive mean analytically. + """ + pass + + def _predictive_variance_analytical(self,mu,sigma): + """ + If available, this function computes the predictive variance analytically. + """ + pass + + def _predictive_mean_numerical(self,mu,sigma): + """ + Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma)) + mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) + """ + + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*mean,'r-') + pb.vlines(maximum,0,f.max()) + """ + return mean + + def _predictive_mean_sq(self,mu,sigma): + """ + Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + """ + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma)) + mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) + return mean_squared + + def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None): + """ + Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + """ + # E( V(Y_star|f_star) ) + maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma)) + exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma) + + """ + pb.figure() + x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)]) + f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x]) + pb.plot(x,f,'b-') + sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma) + f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2) + k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2) + pb.plot(x,f2*exp_var,'r--') + pb.vlines(maximum,0,f.max()) + """ + + #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 ) + exp_exp2 = self._predictive_mean_sq(mu,sigma) + if predictive_mean is None: + predictive_mean = self.predictive_mean(mu,sigma) + var_exp = exp_exp2 - predictive_mean**2 + return exp_var + var_exp + + def _predictive_percentiles(self,p,mu,sigma): + """ + Percentiles of the predictive distribution + + :parm p: lower tail probability + :param mu: cavity distribution mean + :param sigma: cavity distribution standard deviation + :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. + """ + qf = stats.norm.ppf(p,mu,sigma) + return self.gp_link.transf(qf) + + def _nlog_joint_predictive_scaled(self,x,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + """ + return self._nlog_product_scaled(x[0],x[1],mu,sigma) + + def _gradient_nlog_joint_predictive(self,x,mu,sigma): + """ + Gradient of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Gradient not available for discrete outputs." + return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) + + def _hessian_nlog_joint_predictive(self,x,mu,sigma): + """ + Hessian of _nlog_joint_predictive_scaled. + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + ..Note: Only avilable when the output is continuous + """ + assert not self.discrete, "Hessian not available for discrete outputs." + cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) + return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2) + + def _joint_predictive_mode(self,mu,sigma): + """ + Negative logarithm of the joint predictive distribution (latent variable and output). + + :param x: tuple (latent variable,output) + :param mu: latent variable's predictive mean + :param sigma: latent variable's predictive standard deviation + """ + return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma)) + + def predictive_values(self,mu,var): + """ + Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction + :param mu: mean of the latent variable + :param var: variance of the latent variable + """ + if isinstance(mu,float) or isinstance(mu,int): + mu = [mu] + var = [var] + pred_mean = [] + pred_var = [] + q1 = [] + q3 = [] + for m,s in zip(mu,np.sqrt(var)): + pred_mean.append(self.predictive_mean(m,s)) + pred_var.append(self.predictive_variance(m,s,pred_mean[-1])) + q1.append(self._predictive_percentiles(.025,m,s)) + q3.append(self._predictive_percentiles(.975,m,s)) + pred_mean = np.vstack(pred_mean) + pred_var = np.vstack(pred_var) + q1 = np.vstack(q1) + q3 = np.vstack(q3) + return pred_mean, pred_var, q1, q3 diff --git a/GPy/likelihoods/noise_models/poisson_noise.py b/GPy/likelihoods/noise_models/poisson_noise.py new file mode 100644 index 00000000..e4ce90d3 --- /dev/null +++ b/GPy/likelihoods/noise_models/poisson_noise.py @@ -0,0 +1,85 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats,special +import scipy as sp +from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf +import gp_transformations +from noise_distributions import NoiseDistribution + +class Poisson(NoiseDistribution): + """ + Poisson likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): + #self.discrete = True + #self.support_limits = (0,np.inf) + + #self.analytical_mean = False + super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) + + def _preprocess_values(self,Y): #TODO + #self.scale = .5*Y.max() + #self.shift = Y.mean() + return Y #(Y - self.shift)/self.scale + + def _mass(self,gp,obs): + """ + Mass (or density) function + """ + #obs = obs*self.scale + self.shift + return stats.poisson.pmf(obs,self.gp_link.transf(gp)) + + def _nlog_mass(self,gp,obs): + """ + Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted + """ + return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1)) + + def _dnlog_mass_dgp(self,gp,obs): + return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp)) + + def _d2nlog_mass_dgp2(self,gp,obs): + d2_df = self.gp_link.d2transf_df2(gp) + transf = self.gp_link.transf(gp) + return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + + def _dnlog_mass_dobs(self,obs,gp): #TODO not needed + return special.psi(obs+1) - np.log(self.gp_link.transf(gp)) + + def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed + return special.polygamma(1,obs) + + def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed + return -self.gp_link.dtransf_df(gp)/self.gp_link.transf(gp) + + def _mean(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dmean_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2mean_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) + + def _variance(self,gp): + """ + Mass (or density) function + """ + return self.gp_link.transf(gp) + + def _dvariance_dgp(self,gp): + return self.gp_link.dtransf_df(gp) + + def _d2variance_dgp2(self,gp): + return self.gp_link.d2transf_df2(gp) diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index e535ed62..10ce577b 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -14,3 +14,5 @@ from warped_gp import WarpedGP from bayesian_gplvm import BayesianGPLVM from mrd import MRD from gradient_checker import GradientChecker +from gp_multioutput_regression import GPMultioutputRegression +from sparse_gp_multioutput_regression import SparseGPMultioutputRegression diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py index f4cf4e8d..ee92a1b4 100644 --- a/GPy/models/fitc_classification.py +++ b/GPy/models/fitc_classification.py @@ -31,8 +31,8 @@ class FITCClassification(FITC): kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index c6012988..73d492fe 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -31,8 +31,8 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/models/gp_multioutput_regression.py new file mode 100644 index 00000000..d51f3bae --- /dev/null +++ b/GPy/models/gp_multioutput_regression.py @@ -0,0 +1,59 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import GP +from .. import likelihoods +from .. import kern +#from ..util import multioutput + +class GPMultioutputRegression(GP): + """ + Multiple output Gaussian process with Gaussian noise + + This is a wrapper around the models.GP class, with a set of sensible defaults + + :param X_list: input observations + :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output + :param Y_list: observed values + :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param kernel_list: GPy kernels, defaults to rbf + :type kernel_list: list of GPy kernels + :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output + :type noise_variance_list: list of floats + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation) + :type W_columns: integer + """ + + def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1): + + self.num_outputs = len(Y_list) + assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.' + + #Inputs indexing + i = 0 + index = [] + for x,y in zip(X_list,Y_list): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(i,x.size)[:,None]) + i += 1 + index = np.vstack(index) + X = np.hstack([np.vstack(X_list),index]) + original_dim = X.shape[1] - 1 + + #Mixed noise likelihood definition + likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y) + + #Coregionalization kernel definition + if kernel_list is None: + kernel_list = [kern.rbf(original_dim)] + mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns) + + self.multioutput = True + GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X) + self.ensure_default_constraints() diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 5f36ebe1..50c2f935 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -28,11 +28,11 @@ class SparseGPClassification(SparseGP): def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) + kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.Binomial() - likelihood = likelihoods.EP(Y, distribution) + noise_model = likelihoods.binomial() + likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/models/sparse_gp_multioutput_regression.py new file mode 100644 index 00000000..041204b6 --- /dev/null +++ b/GPy/models/sparse_gp_multioutput_regression.py @@ -0,0 +1,80 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import SparseGP +from .. import likelihoods +from .. import kern +from ..util import multioutput + +class SparseGPMultioutputRegression(SparseGP): + """ + Sparse multiple output Gaussian process with Gaussian noise + + This is a wrapper around the models.SparseGP class, with a set of sensible defaults + + :param X_list: input observations + :type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output + :param Y_list: observed values + :type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output + :param kernel_list: GPy kernels, defaults to rbf + :type kernel_list: list of GPy kernels + :param noise_variance_list: noise parameters per output, defaults to 1.0 for every output + :type noise_variance_list: list of floats + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param Z_list: inducing inputs (optional) + :type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list + :param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty) + :type num_inducing: integer + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation) + :type W_columns: integer + """ + #NOTE not tested with uncertain inputs + def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1): + + self.num_outputs = len(Y_list) + assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.' + + #Inducing inputs list + if len(Z_list): + assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.' + else: + if isinstance(num_inducing,np.int): + num_inducing = [num_inducing] * self.num_outputs + num_inducing = np.asarray(num_inducing) + assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.' + for ni,X in zip(num_inducing,X_list): + i = np.random.permutation(X.shape[0])[:ni] + Z_list.append(X[i].copy()) + + #Inputs and inducing inputs indexing + i = 0 + index = [] + index_z = [] + for x,y,z in zip(X_list,Y_list,Z_list): + assert x.shape[0] == y.shape[0] + index.append(np.repeat(i,x.size)[:,None]) + index_z.append(np.repeat(i,z.size)[:,None]) + i += 1 + index = np.vstack(index) + index_z = np.vstack(index_z) + X = np.hstack([np.vstack(X_list),index]) + Z = np.hstack([np.vstack(Z_list),index_z]) + original_dim = X.shape[1] - 1 + + #Mixed noise likelihood definition + likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y) + + #Coregionalization kernel definition + if kernel_list is None: + kernel_list = [kern.rbf(original_dim)] + mkernel = kern.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, kernel_list = kernel_list, W_columns=W_columns) + + self.multioutput = True + SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X) + self.constrain_fixed('.*iip_\d+_1') + self.ensure_default_constraints() diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index 64674f4a..d2e23887 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -20,7 +20,11 @@ class SparseGPRegression(SparseGP): :type normalize_X: False|True :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_Y: False|True + :param Z: inducing inputs (optional, see note) + :type Z: np.ndarray (num_inducing x input_dim) | None :rtype: model object + :param X_variance: The uncertainty in the measurements of X (Gaussian variance) + :type X_variance: np.ndarray (num_data x input_dim) | None .. Note:: Multiple independent outputs are allowed using columns of Y diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 3f81f472..9329aba0 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -4,15 +4,18 @@ import unittest import numpy as np import GPy +<<<<<<< HEAD verbose = False +======= + +>>>>>>> 1bc93747178b0bab1b7177568388ebd4207647e0 class KernelTests(unittest.TestCase): def test_kerneltie(self): K = GPy.kern.rbf(5, ARD=True) K.tie_params('.*[01]') K.constrain_fixed('2') - X = np.random.rand(5,5) Y = np.ones((5,1)) m = GPy.models.GPRegression(X,Y,K) @@ -95,7 +98,6 @@ class KernelTests(unittest.TestCase): self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) - if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py index da71754b..30ca14d6 100644 --- a/GPy/testing/psi_stat_expectation_tests.py +++ b/GPy/testing/psi_stat_expectation_tests.py @@ -7,9 +7,14 @@ import unittest import GPy import numpy as np from GPy import testing +import sys +import numpy +from GPy.kern.parts.rbf import RBF +from GPy.kern.parts.linear import Linear +from copy import deepcopy -__test__ = False -np.random.seed(0) +__test__ = lambda: 'deep' in sys.argv +# np.random.seed(0) def ard(p): try: @@ -19,24 +24,37 @@ def ard(p): pass return "" -@testing.deepTest(__test__) +@testing.deepTest(__test__()) class Test(unittest.TestCase): input_dim = 9 num_inducing = 4 N = 3 - Nsamples = 6e6 + Nsamples = 5e6 def setUp(self): + i_s_dim_list = [2,4,3] + indices = numpy.cumsum(i_s_dim_list).tolist() + input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)] + #input_slices[2] = deepcopy(input_slices[1]) + input_slice_kern = GPy.kern.kern(9, + [ + RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True), + RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True), + Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True) + ], + input_slices = input_slices + ) self.kerns = ( + input_slice_kern, # (GPy.kern.rbf(self.input_dim, ARD=True) + # GPy.kern.linear(self.input_dim, ARD=True) + # GPy.kern.bias(self.input_dim) + # GPy.kern.white(self.input_dim)), - (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + - GPy.kern.bias(self.input_dim) + - GPy.kern.white(self.input_dim)), +# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), # GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), @@ -61,22 +79,22 @@ class Test(unittest.TestCase): def test_psi1(self): for kern in self.kerns: - Nsamples = 100 + Nsamples = np.floor(self.Nsamples/300.) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): - K = kern.K(q_x_sample_stripe, self.Z) + K = kern.K(q_x_sample_stripe[:Nsamples], self.Z) K_ += K - diffs.append(((psi1 - (K_ / (i + 1)))).mean()) + diffs.append((np.abs(psi1 - (K_ / (i + 1)))**2).mean()) K_ /= self.Nsamples / Nsamples msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts]) try: import pylab pylab.figure(msg) pylab.plot(diffs) - self.assertTrue(np.allclose(psi1.squeeze(), K_, - rtol=1e-1, atol=.1), +# print msg, ((psi1.squeeze() - K_)**2).mean() < .01 + self.assertTrue(((psi1.squeeze() - K_)**2).mean() < .01, msg=msg + ": not matching") # sys.stdout.write(".") except: @@ -87,7 +105,7 @@ class Test(unittest.TestCase): def test_psi2(self): for kern in self.kerns: - Nsamples = 100 + Nsamples = self.Nsamples/300. psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] @@ -95,13 +113,14 @@ class Test(unittest.TestCase): K = kern.K(q_x_sample_stripe, self.Z) K = (K[:, :, None] * K[:, None, :]).mean(0) K_ += K - diffs.append(((psi2 - (K_ / (i + 1)))).mean()) + diffs.append(((psi2 - (K_ / (i + 1)))**2).mean()) K_ /= self.Nsamples / Nsamples msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts])) try: import pylab pylab.figure(msg) pylab.plot(diffs) +# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) self.assertTrue(np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1), msg=msg + ": not matching") @@ -114,10 +133,8 @@ class Test(unittest.TestCase): pass if __name__ == "__main__": - import sys - __test__ = 'deep' in sys.argv sys.argv = ['', - 'Test.test_psi0', + #'Test.test_psi0', 'Test.test_psi1', 'Test.test_psi2', ] diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 246d40d1..6bb624df 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -5,7 +5,6 @@ import unittest import numpy as np import GPy -from GPy.likelihoods.likelihood_functions import Binomial class GradientTests(unittest.TestCase): def setUp(self): @@ -199,10 +198,7 @@ class GradientTests(unittest.TestCase): X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.Binomial() - likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.GP(X, likelihood, kernel) - m.ensure_default_constraints() + m = GPy.models.GPClassification(X,Y,kernel=kernel) m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) @@ -212,10 +208,11 @@ class GradientTests(unittest.TestCase): Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.Binomial() - likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.SparseGP(X, likelihood, kernel, Z) - m.ensure_default_constraints() + m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) + #distribution = GPy.likelihoods.likelihood_functions.Binomial() + #likelihood = GPy.likelihoods.EP(Y, distribution) + #m = GPy.core.SparseGP(X, likelihood, kernel, Z) + #m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) @@ -224,10 +221,24 @@ class GradientTests(unittest.TestCase): X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] - m = GPy.models.FITCClassification(X, Y=Y) + m = GPy.models.FITCClassification(X, Y, kernel = k) m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) + def multioutput_regression_1D(self): + X1 = np.random.rand(50, 1) * 8 + X2 = np.random.rand(30, 1) * 5 + X = np.vstack((X1, X2)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 + Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + Y = np.vstack((Y1, Y2)) + + k1 = GPy.kern.rbf(1) + m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) + m.constrain_fixed('.*rbf_var', 1.) + self.assertTrue(m.checkgrad()) + + if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 99548268..c205548b 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -14,3 +14,4 @@ import visualize import decorators import classification import latent_space_visualizations +#import multioutput diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py new file mode 100644 index 00000000..2b06ba95 --- /dev/null +++ b/GPy/util/multioutput.py @@ -0,0 +1,35 @@ +import numpy as np +import warnings +from .. import kern + +def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None): + #TODO build_icm or build_lcm + """ + Builds a kernel for a linear coregionalization model + + :input_dim: Input dimensionality + :num_outputs: Number of outputs + :param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalise kernel). + :param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalise kernel + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' + :type W_columns: integer + """ + + for k in CK: + if k.input_dim <> input_dim: + k.input_dim = input_dim + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + for k in NC: + if k.input_dim <> input_dim + 1: + k.input_dim = input_dim + 1 + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + kernel = CK[0].prod(kern.coregionalise(num_outputs,W_columns,W,kappa),tensor=True) + for k in CK[1:]: + k_coreg = kern.coregionalise(num_outputs,W_columns,W,kappa) + kernel += k.prod(k_coreg,tensor=True) + for k in NC: + kernel += k + + return kernel diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py index 28946894..5a5880d5 100644 --- a/GPy/util/univariate_Gaussian.py +++ b/GPy/util/univariate_Gaussian.py @@ -32,4 +32,15 @@ def std_norm_cdf(x): x = float(x) return weave.inline(code,arg_names=['x'],support_code=support_code) +def inv_std_norm_cdf(x): + """ + Inverse cumulative standard Gaussian distribution + Based on Winitzki, S. (2008) + """ + z = 2*x -1 + ln1z2 = np.log(1-z**2) + a = 8*(np.pi -3)/(3*np.pi*(4-np.pi)) + b = 2/(np.pi * a) + ln1z2/2 + inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b ) + return np.sqrt(2) * inv_erf