diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index ef171459..eac00fec 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -140,7 +140,6 @@ class FITC(SparseGP): dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) - dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) alpha = mdot(LBiLmipsi1,self.V_star) @@ -174,7 +173,7 @@ class FITC(SparseGP): def dL_dZ(self): dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) - dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) + dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z) dL_dZ += self._dpsi1_dX dL_dZ += self._dKmm_dX return dL_dZ diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 278ddc74..63903242 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -19,9 +19,6 @@ class GP(GPBase): :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True :rtype: model object - :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 - :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] - :type powerep: list .. Note:: Multiple independent outputs are allowed using columns of Y @@ -105,7 +102,12 @@ class GP(GPBase): Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta """ - return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + if not isinstance(self.likelihood,EP): + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + else: + tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + return tmp def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): """ @@ -136,7 +138,7 @@ class GP(GPBase): :type Xnew: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -153,5 +155,70 @@ class GP(GPBase): # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) - return mean, var, _025pm, _975pm + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + For a specific output, predict the function at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + .. Note:: For multiple output models only + """ + assert hasattr(self,'multioutput'), 'This function is for multiple output models only.' + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + return mean, var, _025pm, _975pm + + def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only + """ + assert hasattr(self,'multioutput'), 'This function is for multiple output models only.' + # creates an index column and appends it to _Xnew + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T + KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1) + mu = np.dot(KiKx.T, self.likelihood.Y) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - np.dot(KiKx.T, Kx) + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) + var = var[:, None] + if stop: + debug_this # @UndefinedVariable + return mu, var diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 6935fc6f..e26deb0f 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -57,18 +57,12 @@ class GPBase(Model): self.X = state.pop() Model.setstate(self, state) - def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None): """ Plot the GP's view of the world, where the data is normalized and the - likelihood is Gaussian. - - Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In two dimsensions, a contour-plot shows the mean predicted function - - In higher dimensions, we've no implemented this yet !TODO! - - Can plot only part of the data and part of the posterior functions - using which_data and which_functions + - Not implemented in higher dimensions :param samples: the number of a posteriori samples to plot :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits @@ -85,6 +79,8 @@ class GPBase(Model): :param ax: axes to plot on. :type ax: axes handle + :param output: which output to plot (for multiple output models only) + :type output: integer (first output is 0) """ if which_data == 'all': which_data = slice(None) @@ -93,44 +89,90 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - if self.X.shape[1] == 1: - Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) - if samples == 0: - m, v = self._raw_predict(Xnew, which_parts=which_parts) - gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) + if not hasattr(self,'multioutput'): + + if self.X.shape[1] == 1: + Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) + if samples == 0: + m, v = self._raw_predict(Xnew, which_parts=which_parts) + gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) + ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) + else: + m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) + v = v.reshape(m.size,-1) if len(v.shape)==3 else v + Ysim = np.random.multivariate_normal(m.flatten(), v, samples) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) + for i in range(samples): + ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) + ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) + ax.set_xlim(xmin, xmax) + ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_ylim(ymin, ymax) + + if hasattr(self,'Z'): + Zu = self.Z * self._Xscale + self._Xoffset + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + + elif self.X.shape[1] == 2: + resolution = resolution or 50 + Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) + m, v = self._raw_predict(Xnew, which_parts=which_parts) + m = m.reshape(resolution, resolution).T + ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + else: - m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) - Ysim = np.random.multivariate_normal(m.flatten(), v, samples) - gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) - for i in range(samples): - ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) - ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) - ax.set_xlim(xmin, xmax) - ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 2: - resolution = resolution or 50 - Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) - m, v = self._raw_predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs - def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + if self.X.shape[1] == 2: + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs + Xu = self.X[self.X[:,-1]==output ,0:1] + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + + if samples == 0: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts) + gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) + ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) + else: + m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True) + v = v.reshape(m.size,-1) if len(v.shape)==3 else v + Ysim = np.random.multivariate_normal(m.flatten(), v, samples) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) + for i in range(samples): + ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) + ax.set_xlim(xmin, xmax) + ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_ylim(ymin, ymax) + + elif self.X.shape[1] == 3: + raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" + assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + if hasattr(self,'Z'): + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + + + def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): """ Plot the GP with noise where the likelihood is Gaussian. Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In two dimsensions, a contour-plot shows the mean predicted function - - In higher dimensions, we've no implemented this yet !TODO! + - Not implemented in higher dimensions Can plot only part of the data and part of the posterior functions using which_data and which_functions @@ -151,15 +193,13 @@ class GPBase(Model): :type fignum: figure number :param ax: axes to plot on. :type ax: axes handle + :type output: integer (first output is 0) :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param linecol: color of line to plot. :type linecol: :param fillcol: color of fill - :type fillcol: - :param levels: for 2D plotting, the number of contour levels to use - is ax is None, create a new figure - + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ # TODO include samples if which_data == 'all': @@ -169,41 +209,81 @@ class GPBase(Model): fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - plotdims = self.input_dim - len(fixed_inputs) + if not hasattr(self,'multioutput'): - if plotdims == 1: + plotdims = self.input_dim - len(fixed_inputs) + if plotdims == 1: + resolution = resolution or 200 - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now - fixed_dims = np.array([i for i,v in fixed_inputs]) - freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) + fixed_dims = np.array([i for i,v in fixed_inputs]) + freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) - Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits) - Xgrid = np.empty((Xnew.shape[0],self.input_dim)) - Xgrid[:,freedim] = Xnew - for i,v in fixed_inputs: - Xgrid[:,i] = v + Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits) + Xgrid = np.empty((Xnew.shape[0],self.input_dim)) + Xgrid[:,freedim] = Xnew + for i,v in fixed_inputs: + Xgrid[:,i] = v - m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts) - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) + m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) + ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME - resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) - x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) - m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable - Yf = self.likelihood.data.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable - ax.set_xlim(xmin[0], xmax[0]) - ax.set_ylim(xmin[1], xmax[1]) + + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + elif self.X.shape[1] == 2: + resolution = resolution or 50 + Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) + m = m.reshape(resolution, resolution).T + ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + Yf = self.likelihood.Y.flatten() + ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs + if self.X.shape[1] == 2: + resolution = resolution or 200 + Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest + Xu = self.X * self._Xscale + self._Xoffset + Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output) + + for d in range(m.shape[1]): + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) + ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) + ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + + elif self.X.shape[1] == 3: + raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs" + resolution = resolution or 50 + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/core/model.py b/GPy/core/model.py index c31ea209..89150b3a 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) @@ -31,8 +31,8 @@ class Model(Parameterized): def getstate(self): """ Get the current state of the class. - Inherited from Parameterized, so add those parameters to the state + :return: list of states from the model. """ @@ -46,7 +46,7 @@ class Model(Parameterized): call Parameterized with the rest of the state :param state: the state of the model. - :type state: list as returned from getstate. + :type state: list as returned from getstate. """ self.preferred_optimizer = state.pop() self.sampling_runs = state.pop() @@ -397,17 +397,20 @@ class Model(Parameterized): return np.nan return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld - def __str__(self, names=None): - if names is None: - names = self._get_print_names() - s = Parameterized.__str__(self, names=names).split('\n') + def __str__(self): + s = Parameterized.__str__(self).split('\n') + #def __str__(self, names=None): + # if names is None: + # names = self._get_print_names() + #s = Parameterized.__str__(self, names=names).split('\n') # add priors to the string if self.priors is not None: strs = [str(p) if p is not None else '' for p in self.priors] else: - strs = [''] * len(self._get_param_names()) - name_indices = self.grep_param_names("|".join(names)) - strs = np.array(strs)[name_indices] + strs = [''] * len(self._get_params()) + # strs = [''] * len(self._get_param_names()) + # name_indices = self.grep_param_names("|".join(names)) + # strs = np.array(strs)[name_indices] width = np.array(max([len(p) for p in strs] + [5])) + 4 log_like = self.log_likelihood() @@ -456,9 +459,9 @@ class Model(Parameterized): gradient = self.objective_function_gradients(x) numerical_gradient = (f1 - f2) / (2 * dx) - global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient)) - - return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance + global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) + + return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) else: # check the gradient of each parameter individually, and do some pretty printing try: @@ -496,7 +499,7 @@ class Model(Parameterized): gradient = self.objective_function_gradients(x)[i] numerical_gradient = (f1 - f2) / (2 * step) - ratio = (f1 - f2) / (2 * step * gradient) + ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) difference = np.abs((f1 - f2) / 2 / step - gradient) if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: @@ -549,7 +552,7 @@ class Model(Parameterized): :type optimzer: string TODO: valid strings? """ - assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" + assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 last_ll = -np.inf diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py index fe6eba62..45aa70d5 100644 --- a/GPy/core/parameterized.py +++ b/GPy/core/parameterized.py @@ -27,9 +27,9 @@ class Parameterized(object): def _get_param_names(self): raise NotImplementedError, "this needs to be implemented to use the Parameterized class" - def _get_print_names(self): - """ Override for which names to print out, when using print m """ - return self._get_param_names() + #def _get_print_names(self): + # """ Override for which names to print out, when using print m """ + # return self._get_param_names() def pickle(self, filename, protocol=None): if protocol is None: @@ -63,10 +63,10 @@ class Parameterized(object): """ Get the current state of the class, here just all the indices, rest can get recomputed - For inheriting from Parameterized: - Allways append the state of the inherited object - and call down to the inherited object in setstate!! + + Allways append the state of the inherited object + and call down to the inherited object in setstate!! """ return [self.tied_indices, self.fixed_indices, @@ -336,26 +336,30 @@ class Parameterized(object): n = [nn for i, nn in enumerate(n) if not i in remove] return n - @property - def all(self): - return self.__str__(self._get_param_names()) + #@property + #def all(self): + # return self.__str__(self._get_param_names()) - def __str__(self, names=None, nw=30): + #def __str__(self, names=None, nw=30): + def __str__(self, nw=30): """ Return a string describing the parameter names and their ties and constraints """ - if names is None: - names = self._get_print_names() - name_indices = self.grep_param_names("|".join(names)) + names = self._get_param_names() + #if names is None: + # names = self._get_print_names() + #name_indices = self.grep_param_names("|".join(names)) N = len(names) if not N: return "This object has no free parameters." header = ['Name', 'Value', 'Constraints', 'Ties'] - values = self._get_params()[name_indices] # map(str,self._get_params()) + values = self._get_params() # map(str,self._get_params()) + #values = self._get_params()[name_indices] # map(str,self._get_params()) # sort out the constraints - constraints = [''] * len(self._get_param_names()) + constraints = [''] * len(names) + #constraints = [''] * len(self._get_param_names()) for i, t in zip(self.constrained_indices, self.constraints): for ii in i: constraints[ii] = t.__str__() @@ -368,7 +372,10 @@ class Parameterized(object): for j in tie: ties[j] = '(' + str(i) + ')' - values = ['%.4f' % float(v) for v in values] + if values.size == 1: + values = ['%.4f' %float(values)] + else: + values = ['%.4f' % float(v) for v in values] max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])]) max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])]) @@ -383,3 +390,77 @@ class Parameterized(object): return ('\n'.join([header_string[0], separator] + param_string)) + '\n' + + def grep_model(self,regexp): + regexp_indices = self.grep_param_names(regexp) + all_names = self._get_param_names() + + names = [all_names[pj] for pj in regexp_indices] + N = len(names) + + if not N: + return "Match not found." + + header = ['Name', 'Value', 'Constraints', 'Ties'] + all_values = self._get_params() + values = np.array([all_values[pj] for pj in regexp_indices]) + constraints = [''] * len(names) + + _constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices) + _constraints = [self.constraints[pj] for pj in aux] + + for i, t in zip(_constrained_indices, _constraints): + for ii in i: + iii = regexp_indices.tolist().index(ii) + constraints[iii] = t.__str__() + + _fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices) + for i in _fixed_indices: + for ii in i: + iii = regexp_indices.tolist().index(ii) + constraints[ii] = 'Fixed' + + _tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices) + ties = [''] * len(names) + for i,ti in zip(_tied_indices,aux): + for ii in i: + iii = regexp_indices.tolist().index(ii) + ties[iii] = '(' + str(ti) + ')' + + if values.size == 1: + values = ['%.4f' %float(values)] + else: + values = ['%.4f' % float(v) for v in values] + + max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) + max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])]) + max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])]) + max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])]) + cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4 + + header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] + header_string = map(lambda x: '|'.join(x), [header_string]) + separator = '-' * len(header_string[0]) + param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))] + + print header_string[0] + print separator + for string in param_string: + print string + + def _pick_elements(self,regexp_ind,array_list): + """Removes from array_list the elements different from regexp_ind""" + new_array_list = [] #New list with elements matching regexp_ind + array_indices = [] #Indices that matches the arrays in new_array_list and array_list + + array_index = 0 + for array in array_list: + _new = [] + for ai in array: + if ai in regexp_ind: + _new.append(ai) + if len(_new): + new_array_list.append(np.array(_new)) + array_indices.append(array_index) + array_index += 1 + return new_array_list, array_indices diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 1b5fd814..32ceea62 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -5,7 +5,7 @@ import numpy as np import pylab as pb from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from scipy import linalg -from ..likelihoods import Gaussian +from ..likelihoods import Gaussian, EP,EP_Mixed_Noise from gp_base import GPBase class SparseGP(GPBase): @@ -109,7 +109,6 @@ class SparseGP(GPBase): tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1) self._A = tdot(tmp) - # factor B self.B = np.eye(self.num_inducing) + self._A self.LB = jitchol(self.B) @@ -139,6 +138,7 @@ class SparseGP(GPBase): dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: + if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: @@ -160,9 +160,27 @@ class SparseGP(GPBase): # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" + + if self.has_uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + + else: + + LBi = chol_inv(self.LB) + Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0) + + + self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2 + self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2 + + self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 + + self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2 + self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2 + else: - # likelihood is not heterscedatic + # likelihood is not heteroscedatic self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit) @@ -194,8 +212,8 @@ class SparseGP(GPBase): return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\ + self.kern._get_param_names_transformed() + self.likelihood._get_param_names() - def _get_print_names(self): - return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() + #def _get_print_names(self): + # return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() def update_likelihood_approximation(self): """ @@ -240,7 +258,7 @@ class SparseGP(GPBase): """ The derivative of the bound wrt the inducing inputs Z """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) @@ -298,7 +316,7 @@ class SparseGP(GPBase): :type X_variance_new: np.ndarray, Nnew x self.input_dim :param which_parts: specifies which outputs kernel(s) to use in prediction :type which_parts: ('all', list of bools) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise @@ -322,26 +340,133 @@ class SparseGP(GPBase): return mean, var, _025pm, _975pm - def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): - + def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None): if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) if which_data is 'all': which_data = slice(None) - GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax) + GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output) - # add the inducing inputs and some errorbars - if self.X.shape[1] == 1: - if self.has_uncertain_inputs: - Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now - ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], - xerr=2 * np.sqrt(self.X_variance[which_data, 0]), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) - Zu = self.Z * self._Xscale + self._Xoffset - ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + if not hasattr(self,'multioutput'): - elif self.X.shape[1] == 2: - Zu = self.Z * self._Xscale + self._Xoffset - ax.plot(Zu[:, 0], Zu[:, 1], 'wo') + if self.X.shape[1] == 1: + if self.has_uncertain_inputs: + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], + xerr=2 * np.sqrt(self.X_variance[which_data, 0]), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + Zu = self.Z * self._Xscale + self._Xoffset + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + + elif self.X.shape[1] == 2: + Zu = self.Z * self._Xscale + self._Xoffset + ax.plot(Zu[:, 0], Zu[:, 1], 'wo') + + else: + pass + """ + if self.X.shape[1] == 2 and hasattr(self,'multioutput'): + Xu = self.X[self.X[:,-1]==output,:] + if self.has_uncertain_inputs: + Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now + + Xu = self.X[self.X[:,-1]==output ,0:1] #?? + + ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0], + xerr=2 * np.sqrt(self.X_variance[which_data, 0]), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + + Zu = self.Z[self.Z[:,-1]==output,:] + Zu = self.Z * self._Xscale + self._Xoffset + Zu = self.Z[self.Z[:,-1]==output ,0:1] #?? + ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12) + #ax.set_ylim(ax.get_ylim()[0],) + + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + """ + + def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): + """ + For a specific output, predict the function at the new point(s) Xnew. + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim + + .. Note:: For multiple output models only + """ + + assert hasattr(self,'multioutput') + index = np.ones_like(Xnew)*output + Xnew = np.hstack((Xnew,index)) + + # normalize X values + Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output) + return mean, var, _025pm, _975pm + + def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False): + """ + Internal helper function for making predictions for a specific output, + does not account for normalization or likelihood + --------- + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + :param output: output to predict + :type output: integer in {0,..., num_outputs-1} + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the full covariance matrix, or just the diagonal + + .. Note:: For multiple output models only + """ + Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! + symmetrify(Bi) + Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi) + + if self.Cpsi1V is None: + psi1V = np.dot(self.psi1.T,self.likelihood.V) + tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) + tmp, _ = dpotrs(self.LB, tmp, lower=1) + self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1) + + assert hasattr(self,'multioutput') + index = np.ones_like(_Xnew)*output + _Xnew = np.hstack((_Xnew,index)) + + if X_variance_new is None: + Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts) + mu = np.dot(Kx.T, self.Cpsi1V) + if full_cov: + Kxx = self.kern.K(_Xnew, which_parts=which_parts) + var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) + else: + Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new) + mu = np.dot(Kx, self.Cpsi1V) + if full_cov: + raise NotImplementedError, "TODO" + else: + Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new) + psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new) + var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) + + return mu, var[:, None] diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py index eeba3e51..d1964440 100644 --- a/GPy/core/transformations.py +++ b/GPy/core/transformations.py @@ -18,9 +18,11 @@ class Transformation(object): def gradfactor(self, f): """ df_dx evaluated at self.f(x)=f""" raise NotImplementedError + def initialize(self, f): """ produce a sensible initial value for f(x)""" raise NotImplementedError + def __str__(self): raise NotImplementedError @@ -47,8 +49,6 @@ class Negative_logexp(Transformation): return Logexp.finv(-f) # np.log(np.exp(-f) - 1.) def gradfactor(self, f): return -Logexp.gradfactor(-f) - #ef = np.exp(-f) - #return -(ef - 1.) / ef def initialize(self, f): return -Logexp.initialize(f) # np.abs(f) def __str__(self): diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index 77d1982c..88582351 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -166,3 +166,35 @@ def FITC_crescent_data(num_inducing=10, seed=default_seed): print(m) m.plot() return m + + +def toy_heaviside(seed=default_seed): + """ + Simple 1D classification example using a heavy side gp transformation + :param seed : seed value for data generation (default is 4). + :type seed: int + """ + + data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) + Y = data['Y'][:, 0:1] + Y[Y.flatten() == -1] = 0 + + # Model definition + noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) + likelihood = GPy.likelihoods.EP(Y,noise_model) + m = GPy.models.GPClassification(data['X'], likelihood=likelihood) + + # Optimize + m.update_likelihood_approximation() + # Parameters optimization: + m.optimize() + #m.pseudo_EM() + + # Plot + fig, axes = pb.subplots(2,1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + print(m) + + return m + diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index f969bbe4..3e46b566 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -9,9 +9,9 @@ import pylab as pb import numpy as np import GPy -def coregionalisation_toy2(max_iters=100): +def coregionalization_toy2(max_iters=100): """ - A simple demonstration of coregionalisation on two sinusoidal functions. + A simple demonstration of coregionalization on two sinusoidal functions. """ X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 @@ -22,8 +22,8 @@ def coregionalisation_toy2(max_iters=100): Y = np.vstack((Y1, Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2, 1) - k = k1**k2 + k2 = GPy.kern.coregionalize(2,1) + k = k1**k2 #k = k1.prod(k2,tensor=True) m = GPy.models.GPRegression(X, Y, kernel=k) m.constrain_fixed('.*rbf_var', 1.) # m.constrain_positive('.*kappa') @@ -40,41 +40,32 @@ def coregionalisation_toy2(max_iters=100): pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) return m -def coregionalisation_toy(max_iters=100): +def coregionalization_toy(max_iters=100): """ - A simple demonstration of coregionalisation on two sinusoidal functions. + A simple demonstration of coregionalization on two sinusoidal functions. """ X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 - index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) - X = np.hstack((np.vstack((X1, X2)), index)) + X = np.vstack((X1, X2)) Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y = np.vstack((Y1, Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #k1.prod(k2, tensor=True) - m = GPy.models.GPRegression(X, Y, kernel=k) + m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m.constrain_fixed('.*rbf_var', 1.) - # m.constrain_positive('kappa') m.optimize(max_iters=max_iters) - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') return m - -def coregionalisation_sparse(max_iters=100): +def coregionalization_sparse(max_iters=100): """ - A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. + A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations. """ X1 = np.random.rand(500, 1) * 8 X2 = np.random.rand(300, 1) * 5 @@ -84,33 +75,18 @@ def coregionalisation_sparse(max_iters=100): Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y = np.vstack((Y1, Y2)) - num_inducing = 40 - Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None])) - k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2, 2) - k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001) - m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) - m.constrain_fixed('.*rbf_var', 1.) - m.constrain_fixed('iip') - m.constrain_bounded('noise_variance', 1e-3, 1e-1) -# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') - m.optimize(max_iters=max_iters) + m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20) + m.constrain_fixed('.*rbf_var',1.) + m.optimize(messages=1) + #m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') - # plotting: - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) - y = pb.ylim()[0] - pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2) - pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2) + fig, axes = pb.subplots(2,1) + m.plot(output=0,ax=axes[0]) + m.plot(output=1,ax=axes[1]) + axes[0].set_title('Output 0') + axes[1].set_title('Output 1') return m def epomeo_gpx(max_iters=100): @@ -136,8 +112,8 @@ def epomeo_gpx(max_iters=100): np.random.randint(0, 4, num_inducing)[:, None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(output_dim=5, rank=5) - k = k1**k2 + k2 = GPy.kern.coregionalize(output_dim=5, rank=5) + k = k1**k2 m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True) m.constrain_fixed('.*rbf_var', 1.) @@ -401,8 +377,6 @@ def silhouette(max_iters=100): print(m) return m - - def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index 0ef487af..589ec4c7 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -130,7 +130,7 @@ class opt_lbfgsb(Optimizer): opt_dict['pgtol'] = self.gtol opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, - maxfun=self.max_f_eval, **opt_dict) + maxfun=self.max_iters, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index f7c0fd67..046b0205 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -5,7 +5,6 @@ import numpy as np from kern import kern import parts - def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False): """ Construct an RBF kernel @@ -74,9 +73,9 @@ def gibbs(input_dim,variance=1., mapping=None): Gibbs and MacKay non-stationary covariance function. .. math:: - + r = sqrt((x_i - x_j)'*(x_i - x_j)) - + k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x'))) Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')} @@ -90,7 +89,7 @@ def gibbs(input_dim,variance=1., mapping=None): The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. :param input_dim: the number of input dimensions - :type input_dim: int + :type input_dim: int :param variance: the variance :math:`\sigma^2` :type variance: float :param mapping: the mapping that gives the lengthscale across the input space. @@ -103,6 +102,12 @@ def gibbs(input_dim,variance=1., mapping=None): part = parts.gibbs.Gibbs(input_dim,variance,mapping) return kern(input_dim, [part]) +def hetero(input_dim, mapping=None, transform=None): + """ + """ + part = parts.hetero.Hetero(input_dim,mapping,transform) + return kern(input_dim, [part]) + def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False): """ Construct a polynomial kernel @@ -135,6 +140,7 @@ def white(input_dim,variance=1.): part = parts.white.White(input_dim,variance) return kern(input_dim, [part]) + def exponential(input_dim,variance=1., lengthscale=None, ARD=False): """ Construct an exponential kernel @@ -340,29 +346,30 @@ def symmetric(k): k_.parts = [symmetric.Symmetric(p) for p in k.parts] return k_ -def coregionalise(output_dim, rank=1, W=None, kappa=None): +def coregionalize(num_outputs,W_columns=1, W=None, kappa=None): """ - Coregionalisation kernel. - - Used for computing covariance functions of the form - .. math:: - k_2(x, y)=\mathbf{B} k(x, y) - where + Coregionlization matrix B, of the form: .. math:: \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} - :param output_dim: the number of output dimensions - :type output_dim: int - :param rank: the rank of the coregionalisation matrix. - :type rank: int - :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. - :type W: ndarray - :param kappa: a diagonal term which allows the outputs to behave independently. + An intrinsic/linear coregionalization kernel of the form + .. math:: + k_2(x, y)=\mathbf{B} k(x, y) + + it is obtainded as the tensor product between a kernel k(x,y) and B. + + :param num_outputs: the number of outputs to coregionalize + :type num_outputs: int + :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None) + :type W_colunns: int + :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B + :type W: numpy array of dimensionality (num_outpus, W_columns) + :param kappa: a vector which allows the outputs to behave independently + :type kappa: numpy array of dimensionality (num_outputs,) :rtype: kernel object - .. Note: see coregionalisation examples in GPy.examples.regression for some usage. """ - p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa) + p = parts.coregionalize.Coregionalize(num_outputs,W_columns,W,kappa) return kern(1,[p]) @@ -421,3 +428,31 @@ def hierarchical(k): # assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" _parts = [parts.hierarchical.Hierarchical(k.parts)] return kern(k.input_dim+len(k.parts),_parts) + +def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa=None): + """ + Builds a kernel of a linear coregionalization model + + :input_dim: Input dimensionality + :num_outputs: Number of outputs + :kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix + :type kernel_list: list of GPy kernels + :param W_columns: number tuples of the corregionalization parameters 'coregion_W' + :type W_columns: integer + + ..Note the kernels dimensionality is overwritten to fit input_dim + """ + + for k in kernel_list: + if k.input_dim <> input_dim: + k.input_dim = input_dim + warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + + k_coreg = coregionalize(num_outputs,W_columns,W,kappa) + kernel = kernel_list[0]**k_coreg.copy() + + for k in kernel_list[1:]: + k_coreg = coregionalize(num_outputs,W_columns,W,kappa) + kernel += k**k_coreg.copy() + + return kernel diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 2dc943bf..4a822758 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -13,7 +13,9 @@ import GPy class kern(Parameterized): def __init__(self, input_dim, parts=[], input_slices=None): """ - This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. + This is the main kernel class for GPy. It handles multiple + (additive) kernel functions, and keeps track of various things + like which parameters live where. The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This object contains a list of parts, which are @@ -34,6 +36,11 @@ class kern(Parameterized): self.input_dim = input_dim + part_names = [k.name for k in self.parts] + self.name='' + for name in part_names: + self.name += name + '+' + self.name = self.name[:-1] # deal with input_slices if input_slices is None: self.input_slices = [slice(None) for p in self.parts] @@ -334,10 +341,8 @@ class kern(Parameterized): :type X: np.ndarray (num_samples x input_dim) :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim)""" - if X2 is None: - X2 = X target = np.zeros_like(X) - if X2 is None: + if X2 is None: [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] else: [p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] @@ -654,17 +659,85 @@ def kern_test(kern, X=None, X2=None, verbose=False): :param X2: X2 input values to test the covariance function. :type X2: ndarray """ + pass_checks = True if X==None: X = np.random.randn(10, kern.input_dim) if X2==None: X2 = np.random.randn(20, kern.input_dim) - result = [Kern_check_model(kern, X=X).is_positive_definite(), - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose), - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose), - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose), - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose), - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)] - # Need to check - #Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)] - # but currently I think these aren't implemented. - return np.all(result) + if verbose: + print("Checking covariance function is positive definite.") + result = Kern_check_model(kern, X=X).is_positive_definite() + if result and verbose: + print("Check passed.") + if not result: + print("Positive definite check failed for " + kern.name + " covariance function.") + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt theta.") + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt X.") + result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt X.") + result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt X.") + result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + return pass_checks diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/parts/Matern32.py index 60f0b6e9..40da79f0 100644 --- a/GPy/kern/parts/Matern32.py +++ b/GPy/kern/parts/Matern32.py @@ -98,9 +98,13 @@ class Matern32(Kernpart): def dK_dX(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + if X2 is None: + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + + else: + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/parts/Matern52.py index e02cb9bf..4bf4a1a8 100644 --- a/GPy/kern/parts/Matern52.py +++ b/GPy/kern/parts/Matern52.py @@ -98,9 +98,12 @@ class Matern52(Kernpart): def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) + if X2 is None: + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] + ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) + else: + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] + ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) target += np.sum(dK_dX*dL_dK.T[:,:,None],0) diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index 053e280f..643483f5 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -1,10 +1,12 @@ import bias import Brownian -import coregionalise +import coregionalize import exponential import finite_dimensional import fixed import gibbs +#import hetero #hetero.py is not commited: omitting for now. JH. +import hierarchical import independent_outputs import linear import Matern32 @@ -19,8 +21,7 @@ import prod import rational_quadratic import rbfcos import rbf +import rbf_inv import spline import symmetric import white -import hierarchical -import rbf_inv diff --git a/GPy/kern/parts/coregionalise.py b/GPy/kern/parts/coregionalize.py similarity index 56% rename from GPy/kern/parts/coregionalise.py rename to GPy/kern/parts/coregionalize.py index 94179088..363d98c3 100644 --- a/GPy/kern/parts/coregionalise.py +++ b/GPy/kern/parts/coregionalize.py @@ -7,44 +7,48 @@ from GPy.util.linalg import mdot, pdinv import pdb from scipy import weave -class Coregionalise(Kernpart): +class Coregionalize(Kernpart): """ - Coregionalisation kernel. + Covariance function for intrinsic/linear coregionalization models - Used for computing covariance functions of the form + This covariance has the form .. math:: - k_2(x, y)=B k(x, y) - where + \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} + + An intrinsic/linear coregionalization covariance function of the form .. math:: - B = WW^\top + diag(kappa) + k_2(x, y)=\mathbf{B} k(x, y) - :param output_dim: the number of output dimensions - :type output_dim: int - :param rank: the rank of the coregionalisation matrix. - :type rank: int - :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. - :type W: ndarray - :param kappa: a diagonal term which allows the outputs to behave independently. - :rtype: kernel object + it is obtained as the tensor product between a covariance function + k(x,y) and B. - .. Note: see coregionalisation examples in GPy.examples.regression for some usage. + :param num_outputs: number of outputs to coregionalize + :type num_outputs: int + :param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None) + :type W_colunns: int + :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B + :type W: numpy array of dimensionality (num_outpus, W_columns) + :param kappa: a vector which allows the outputs to behave independently + :type kappa: numpy array of dimensionality (num_outputs,) + + .. Note: see coregionalization examples in GPy.examples.regression for some usage. """ - def __init__(self,output_dim,rank=1, W=None, kappa=None): + def __init__(self,num_outputs,W_columns=1, W=None, kappa=None): self.input_dim = 1 self.name = 'coregion' - self.output_dim = output_dim - self.rank = rank + self.num_outputs = num_outputs + self.W_columns = W_columns if W is None: - self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) + self.W = 0.5*np.random.randn(self.num_outputs,self.W_columns)/np.sqrt(self.W_columns) else: - assert W.shape==(self.output_dim,self.rank) + assert W.shape==(self.num_outputs,self.W_columns) self.W = W if kappa is None: - kappa = 0.5*np.ones(self.output_dim) + kappa = 0.5*np.ones(self.num_outputs) else: - assert kappa.shape==(self.output_dim,) + assert kappa.shape==(self.num_outputs,) self.kappa = kappa - self.num_params = self.output_dim*(self.rank + 1) + self.num_params = self.num_outputs*(self.W_columns + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): @@ -52,12 +56,12 @@ class Coregionalise(Kernpart): def _set_params(self,x): assert x.size == self.num_params - self.kappa = x[-self.output_dim:] - self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank) + self.kappa = x[-self.num_outputs:] + self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) def _get_param_names(self): - return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)] + return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)] def K(self,index,index2,target): index = np.asarray(index,dtype=np.int) @@ -75,26 +79,26 @@ class Coregionalise(Kernpart): if index2 is None: code=""" for(int i=0;i