diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 083f9980..12e71c93 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -9,7 +9,9 @@ from ..likelihoods import Gaussian, Gaussian_Mixed_Noise class GPBase(Model): """ Gaussian process base model for holding shared behaviour between - sparse_GP and GP models. + sparse_GP and GP models, and potentially other models in the future. + + Here we define some functions that are use """ def __init__(self, X, likelihood, kernel, normalize_X=False): self.X = X @@ -34,29 +36,6 @@ class GPBase(Model): # All leaf nodes should call self._set_params(self._get_params()) at # the end - def getstate(self): - """ - Get the current state of the class, here we return everything that is needed to recompute the model. - """ - return Model.getstate(self) + [self.X, - self.num_data, - self.input_dim, - self.kern, - self.likelihood, - self.output_dim, - self._Xoffset, - self._Xscale] - - def setstate(self, state): - self._Xscale = state.pop() - self._Xoffset = state.pop() - self.output_dim = state.pop() - self.likelihood = state.pop() - self.kern = state.pop() - self.input_dim = state.pop() - self.num_data = state.pop() - self.X = state.pop() - Model.setstate(self, state) def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): """ @@ -269,152 +248,32 @@ class GPBase(Model): else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): + def getstate(self): """ - For a specific output, in a multioutput model, this function works just as plot_f on single output models. - - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) - :param samples: the number of a posteriori samples to plot - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param full_cov: - :type full_cov: bool - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle + Get the curent state of the class. This is only used to efficiently + pickle the model. See also self.setstate """ - assert output is not None, "An output must be specified." - assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1) + return Model.getstate(self) + [self.X, + self.num_data, + self.input_dim, + self.kern, + self.likelihood, + self.output_dim, + self._Xoffset, + self._Xscale] - if which_data == 'all': - which_data = slice(None) - - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - - if self.X.shape[1] == 2: - Xu = self.X[self.X[:,-1]==output ,0:1] - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - Xnew_indexed = self._add_output_index(Xnew,output) - - m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts) - - if samples: - Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True) - for yi in Ysim.T: - ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) - - gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax) - ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5) - ax.set_xlim(xmin, xmax) - ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 3: - raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" - #if samples: - # warnings.warn("Samples only implemented for 1 dimensional inputs.") - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - - def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): + def setstate(self, state): """ - For a specific output, in a multioutput model, this function works just as plot_f on single output models. - - :param output: which output to plot (for multiple output models only) - :type output: integer (first output is 0) - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :type plot_limits: np.array - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - :type resolution: int - :param levels: number of levels to plot in a contour plot. - :type levels: int - :param samples: the number of a posteriori samples to plot - :type samples: int - :param fignum: figure to plot on. - :type fignum: figure number - :param ax: axes to plot on. - :type ax: axes handle - :type output: integer (first output is 0) - :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. - :type fixed_inputs: a list of tuples - :param linecol: color of line to plot. - :type linecol: - :param fillcol: color of fill - :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + Set the state of the model. Used for efficient pickling """ - assert output is not None, "An output must be specified." - assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1) - if which_data == 'all': - which_data = slice(None) - - if ax is None: - fig = pb.figure(num=fignum) - ax = fig.add_subplot(111) - - if self.X.shape[1] == 2: - resolution = resolution or 200 - - Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest - Xu = self.X * self._Xscale + self._Xoffset - Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column - - Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - Xnew_indexed = self._add_output_index(Xnew,output) + self._Xscale = state.pop() + self._Xoffset = state.pop() + self.output_dim = state.pop() + self.likelihood = state.pop() + self.kern = state.pop() + self.input_dim = state.pop() + self.num_data = state.pop() + self.X = state.pop() + Model.setstate(self, state) - m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output) - - if samples: #NOTE not tested with fixed_inputs - Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output) - for yi in Ysim.T: - ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) - - for d in range(m.shape[1]): - gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5) - ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) - ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) - ax.set_xlim(xmin, xmax) - ax.set_ylim(ymin, ymax) - - elif self.X.shape[1] == 3: - raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet" - #if samples: - # warnings.warn("Samples only implemented for 1 dimensional inputs.") - - else: - raise NotImplementedError, "Cannot define a frame with more than two input dimensions" - - - def _add_output_index(self,X,output): - """ - In a multioutput model, appends an index column to X to specify the output it is related to. - - :param X: Input data - :type X: np.ndarray, N x self.input_dim - :param output: output X is related to - :type output: integer in {0,..., output_dim-1} - - .. Note:: For multiple non-independent outputs models only. - """ - - assert hasattr(self,'multioutput'), 'This function is for multiple output models only.' - - index = np.ones((X.shape[0],1))*output - return np.hstack((X,index))