Integrated Laplace and merged Merge remote-tracking branch 'gpy_real/devel' into merge_branch

Conflicts:
	GPy/core/gp.py
	GPy/likelihoods/__init__.py
	GPy/likelihoods/likelihood_functions.py
	GPy/likelihoods/link_functions.py
This commit is contained in:
Alan Saul 2013-10-03 16:52:02 +01:00
commit 8343615098
106 changed files with 5841 additions and 1134 deletions

View file

@ -11,25 +11,27 @@ from sparse_gp import SparseGP
class FITC(SparseGP):
"""
sparse FITC approximation
Sparse FITC approximation
:param X: inputs
:type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self):
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-Gaussian likelihood using Expectation Propagation
@ -37,7 +39,7 @@ class FITC(SparseGP):
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params())
def _compute_kernel_matrices(self):
@ -120,7 +122,7 @@ class FITC(SparseGP):
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
@ -140,7 +142,6 @@ class FITC(SparseGP):
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
@ -174,7 +175,7 @@ class FITC(SparseGP):
def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ

View file

@ -15,20 +15,17 @@ class GP(GPBase):
:param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood
:param likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
self.update_likelihood_approximation()
def getstate(self):
return GPBase.getstate(self)
@ -38,15 +35,19 @@ class GP(GPBase):
self._set_params(self._get_params())
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
self.likelihood._set_params(p[self.kern.num_params_transformed():])
new_kern_params = p[:self.kern.num_params_transformed()]
new_likelihood_params = p[self.kern.num_params_transformed():]
old_likelihood_params = self.likelihood._get_params()
#TODO: Need to get rid of this check and think of a nicer OO way
if isinstance(self.likelihood, Laplace):
self.likelihood.fit_full(self.kern.K(self.X))
self.likelihood._set_params(self.likelihood._get_params())
self.kern._set_params_transformed(new_kern_params)
self.likelihood._set_params_transformed(new_likelihood_params)
self.K = self.kern.K(self.X)
#Re fit likelihood approximation (if it is an approx), as parameters have changed
if isinstance(self.likelihood, Laplace):
self.likelihood.fit_full(self.K)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@ -63,6 +64,10 @@ class GP(GPBase):
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
#Adding dZ_dK (0 for a non-approximate likelihood, compensates for
#additional gradients of K when log-likelihood has non-zero Z term)
self.dL_dK += self.likelihood.dZ_dK
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -70,7 +75,7 @@ class GP(GPBase):
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
@ -78,7 +83,7 @@ class GP(GPBase):
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X))
self.likelihood.fit_full(self.kern.K(self.X), **kwargs)
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
@ -103,25 +108,13 @@ class GP(GPBase):
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X)
#Think of OO way of doing this also
if isinstance(self.likelihood, Laplace):
#self.likelihood.fit_full(self.kern.K(self.X))
#self.likelihood._set_params(self.likelihood._get_params())
dK_dthetaK = self.kern.dK_dtheta
dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X.copy())
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
else:
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
return np.hstack((dL_dthetaK, dL_dthetaL))
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
"""
@ -146,17 +139,16 @@ class GP(GPBase):
def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
:returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
@ -169,5 +161,69 @@ class GP(GPBase):
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:returns: posterior mean, a Numpy array, Nnew x self.input_dim
:returns: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
# creates an index column and appends it to _Xnew
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this # @UndefinedVariable
return mu, var

View file

@ -57,18 +57,12 @@ class GPBase(Model):
self.X = state.pop()
Model.setstate(self, state)
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
"""
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
- Not implemented in higher dimensions
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
@ -85,6 +79,8 @@ class GPBase(Model):
:param ax: axes to plot on.
:type ax: axes handle
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
"""
if which_data == 'all':
which_data = slice(None)
@ -93,44 +89,89 @@ class GPBase(Model):
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
if not hasattr(self,'multioutput'):
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
if hasattr(self,'Z'):
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
if self.X.shape[1] == 2:
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
else:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
if hasattr(self,'Z'):
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the GP with noise where the likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
- Not implemented in higher dimensions
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
@ -151,15 +192,13 @@ class GPBase(Model):
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:type fillcol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
@ -169,41 +208,98 @@ class GPBase(Model):
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if not hasattr(self,'multioutput'):
if plotdims == 1:
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
resolution = resolution or 200
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.data.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs"
resolution = resolution or 50
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
"""
def samples_f(self,X,samples=10, which_data='all', which_parts='all',output=None):
if which_data == 'all':
which_data = slice(None)
if hasattr(self,'multioutput'):
np.hstack([X,np.ones((X.shape[0],1))*output])
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
#gpplot(X, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(X, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
"""

View file

@ -49,6 +49,7 @@ class Mapping(Parameterized):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
"""
Plot the mapping.
Plots the mapping associated with the model.
@ -79,8 +80,7 @@ class Mapping(Parameterized):
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
# TODO include samples

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
@ -31,8 +31,8 @@ class Model(Parameterized):
def getstate(self):
"""
Get the current state of the class.
Inherited from Parameterized, so add those parameters to the state
:return: list of states from the model.
"""
@ -46,7 +46,8 @@ class Model(Parameterized):
call Parameterized with the rest of the state
:param state: the state of the model.
:type state: list as returned from getstate.
:type state: list as returned from getstate.
"""
self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop()
@ -56,10 +57,11 @@ class Model(Parameterized):
def set_prior(self, regexp, what):
"""
Sets priors on the model parameters.
Notes
-----
**Notes**
Asserts that the prior is suitable for the constraint. If the
wrong constraint is in place, an error is raised. If no
constraint is in place, one is added (warning printed).
@ -185,8 +187,8 @@ class Model(Parameterized):
be handled silently. If _all_ runs fail, the model is reset to the
existing parameter values.
Notes
-----
**Notes**
:param num_restarts: number of restarts to use (default 10)
:type num_restarts: int
:param robust: whether to handle exceptions silently or not (default False)
@ -195,7 +197,9 @@ class Model(Parameterized):
:type parallel: bool
:param num_processes: number of workers in the multiprocessing pool
:type numprocesses: int
**kwargs are passed to the optimizer. They can be:
\*\*kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:param max_iters: maximum number of iterations
@ -203,9 +207,7 @@ class Model(Parameterized):
:param messages: whether to display during optimisation
:type messages: bool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
set to the number of processors on the current machine.
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.
"""
initial_parameters = self._get_params_transformed()
@ -249,7 +251,7 @@ class Model(Parameterized):
self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self):
"""
"""
Ensure that any variables which should clearly be positive
have been constrained somehow. The method performs a regular
expression search on parameter names looking for the terms
@ -272,7 +274,7 @@ class Model(Parameterized):
"""
The objective function passed to the optimizer. It combines
the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the objective, and will raise the original exception if it
the objective cannot be computed.
@ -397,17 +399,20 @@ class Model(Parameterized):
return np.nan
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self, names=None):
if names is None:
names = self._get_print_names()
s = Parameterized.__str__(self, names=names).split('\n')
def __str__(self):
s = Parameterized.__str__(self).split('\n')
#def __str__(self, names=None):
# if names is None:
# names = self._get_print_names()
#s = Parameterized.__str__(self, names=names).split('\n')
# add priors to the string
if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors]
else:
strs = [''] * len(self._get_param_names())
name_indices = self.grep_param_names("|".join(names))
strs = np.array(strs)[name_indices]
strs = [''] * len(self._get_params())
# strs = [''] * len(self._get_param_names())
# name_indices = self.grep_param_names("|".join(names))
# strs = np.array(strs)[name_indices]
width = np.array(max([len(p) for p in strs] + [5])) + 4
log_like = self.log_likelihood()
@ -456,7 +461,7 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else:
@ -496,7 +501,7 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
@ -535,22 +540,17 @@ class Model(Parameterized):
return k.variances
def pseudo_EM(self, epsilon=.1, **kwargs):
def pseudo_EM(self, stop_crit=.1, **kwargs):
"""
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation
kwargs are passed to the optimize function. They can be:
:epsilon: convergence criterion
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
:param stop_crit: convergence criterion
:type stop_crit: float
.. Note: kwargs are passed to update_likelihood and optimize functions.
"""
assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
ll_change = epsilon + 1.
assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods"
ll_change = stop_crit + 1.
iteration = 0
last_ll = -np.inf
@ -558,10 +558,25 @@ class Model(Parameterized):
alpha = 0
stop = False
#Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon','power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop:
last_approximation = self.likelihood.copy()
last_params = self._get_params()
self.update_likelihood_approximation()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation()
new_ll = self.log_likelihood()
ll_change = new_ll - last_ll
@ -573,7 +588,7 @@ class Model(Parameterized):
else:
self.optimize(**kwargs)
last_ll = self.log_likelihood()
if ll_change < epsilon:
if ll_change < stop_crit:
stop = True
iteration += 1
if stop:

View file

@ -27,9 +27,9 @@ class Parameterized(object):
def _get_param_names(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_print_names(self):
""" Override for which names to print out, when using print m """
return self._get_param_names()
#def _get_print_names(self):
# """ Override for which names to print out, when using print m """
# return self._get_param_names()
def pickle(self, filename, protocol=None):
if protocol is None:
@ -63,10 +63,10 @@ class Parameterized(object):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
For inheriting from Parameterized:
Allways append the state of the inherited object
and call down to the inherited object in setstate!!
Allways append the state of the inherited object
and call down to the inherited object in setstate!!
"""
return [self.tied_indices,
self.fixed_indices,
@ -231,17 +231,19 @@ class Parameterized(object):
def constrain_fixed(self, regexp, value=None):
"""
Arguments
---------
:param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value
:type value: float
Notes
-----
**Notes**
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
"""
matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices()))
@ -336,26 +338,30 @@ class Parameterized(object):
n = [nn for i, nn in enumerate(n) if not i in remove]
return n
@property
def all(self):
return self.__str__(self._get_param_names())
#@property
#def all(self):
# return self.__str__(self._get_param_names())
def __str__(self, names=None, nw=30):
#def __str__(self, names=None, nw=30):
def __str__(self, nw=30):
"""
Return a string describing the parameter names and their ties and constraints
"""
if names is None:
names = self._get_print_names()
name_indices = self.grep_param_names("|".join(names))
names = self._get_param_names()
#if names is None:
# names = self._get_print_names()
#name_indices = self.grep_param_names("|".join(names))
N = len(names)
if not N:
return "This object has no free parameters."
header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params()[name_indices] # map(str,self._get_params())
values = self._get_params() # map(str,self._get_params())
#values = self._get_params()[name_indices] # map(str,self._get_params())
# sort out the constraints
constraints = [''] * len(self._get_param_names())
constraints = [''] * len(names)
#constraints = [''] * len(self._get_param_names())
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
constraints[ii] = t.__str__()
@ -368,7 +374,10 @@ class Parameterized(object):
for j in tie:
ties[j] = '(' + str(i) + ')'
values = ['%.4f' % float(v) for v in values]
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
@ -383,3 +392,77 @@ class Parameterized(object):
return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
def grep_model(self,regexp):
regexp_indices = self.grep_param_names(regexp)
all_names = self._get_param_names()
names = [all_names[pj] for pj in regexp_indices]
N = len(names)
if not N:
return "Match not found."
header = ['Name', 'Value', 'Constraints', 'Ties']
all_values = self._get_params()
values = np.array([all_values[pj] for pj in regexp_indices])
constraints = [''] * len(names)
_constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices)
_constraints = [self.constraints[pj] for pj in aux]
for i, t in zip(_constrained_indices, _constraints):
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[iii] = t.__str__()
_fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices)
for i in _fixed_indices:
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[ii] = 'Fixed'
_tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices)
ties = [''] * len(names)
for i,ti in zip(_tied_indices,aux):
for ii in i:
iii = regexp_indices.tolist().index(ii)
ties[iii] = '(' + str(ti) + ')'
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
print header_string[0]
print separator
for string in param_string:
print string
def _pick_elements(self,regexp_ind,array_list):
"""Removes from array_list the elements different from regexp_ind"""
new_array_list = [] #New list with elements matching regexp_ind
array_indices = [] #Indices that matches the arrays in new_array_list and array_list
array_index = 0
for array in array_list:
_new = []
for ai in array:
if ai in regexp_ind:
_new.append(ai)
if len(_new):
new_array_list.append(np.array(_new))
array_indices.append(array_index)
array_index += 1
return new_array_list, array_indices

View file

@ -5,7 +5,7 @@ import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
from scipy import linalg
from ..likelihoods import Gaussian
from ..likelihoods import Gaussian, EP,EP_Mixed_Noise
from gp_base import GPBase
class SparseGP(GPBase):
@ -16,16 +16,17 @@ class SparseGP(GPBase):
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
:param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
@ -109,7 +110,6 @@ class SparseGP(GPBase):
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self._A
self.LB = jitchol(self.B)
@ -139,6 +139,7 @@ class SparseGP(GPBase):
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
@ -160,9 +161,27 @@ class SparseGP(GPBase):
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
LBi = chol_inv(self.LB)
Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
else:
# likelihood is not heterscedatic
# likelihood is not heteroscedatic
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
@ -194,10 +213,10 @@ class SparseGP(GPBase):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def _get_print_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
#def _get_print_names(self):
# return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
@ -211,10 +230,10 @@ class SparseGP(GPBase):
Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1.T)
self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
@ -240,7 +259,7 @@ class SparseGP(GPBase):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
@ -274,7 +293,7 @@ class SparseGP(GPBase):
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
# assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented"
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
@ -288,17 +307,18 @@ class SparseGP(GPBase):
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
**Arguments**
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -322,26 +342,131 @@ class SparseGP(GPBase):
return mean, var, _025pm, _975pm
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if which_data is 'all':
which_data = slice(None)
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
# add the inducing inputs and some errorbars
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
if not hasattr(self,'multioutput'):
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
"""
Xu = self.X[self.X[:,-1]==output,:]
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
Xu = self.X[self.X[:,-1]==output ,0:1] #??
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
"""
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
#ax.set_ylim(ax.get_ylim()[0],)
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
assert hasattr(self,'multioutput')
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
if X_variance_new is None:
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]

View file

@ -14,6 +14,7 @@ import sys
class SVIGP(GPBase):
"""
Stochastic Variational inference in a Gaussian Process
:param X: inputs
@ -22,25 +23,26 @@ class SVIGP(GPBase):
:type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include
Additional kwargs are used as for a sparse GP. They include:
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param kernel : the kernel/covariance function. See link kernels
:param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""

View file

@ -18,9 +18,11 @@ class transformation(object):
def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
def __str__(self):
raise NotImplementedError
@ -42,15 +44,13 @@ class logexp(transformation):
class negative_logexp(transformation):
domain = NEGATIVE
def f(self, x):
return -logexp.f(x) #np.log(1. + np.exp(x))
return -logexp.f(x)
def finv(self, f):
return logexp.finv(-f) #np.log(np.exp(-f) - 1.)
return logexp.finv(-f)
def gradfactor(self, f):
return -logexp.gradfactor(-f)
#ef = np.exp(-f)
#return -(ef - 1.) / ef
def initialize(self, f):
return -logexp.initialize(f) #np.abs(f)
return -logexp.initialize(f)
def __str__(self):
return '(-ve)'
@ -82,7 +82,6 @@ class logexp_clipped(logexp):
return '(+ve_c)'
class exponent(transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
domain = POSITIVE
def f(self, x):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))